* [PATCH v3 01/13] verification/rvgen: Switch LTL parser to Lark
From: Nam Cao @ 2026-06-08 8:56 UTC (permalink / raw)
To: Gabriele Monaco, Steven Rostedt, Wander Lairson Costa,
linux-trace-kernel, linux-kernel
Cc: Nam Cao
In-Reply-To: <cover.1780908661.git.namcao@linutronix.de>
The LTL parser is built using Ply. However, Ply is no longer
maintained [1].
Switch to use Lark instead. In addition to being actively maintained, Lark
also offers additional features (namely, automatically creating the
abstract syntax tree) which make the parser simpler.
Link: https://github.com/dabeaz/ply/commit/9d7c40099e23ff78f9d86ef69a26c1e8a83e706a [1]
Reviewed-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
tools/verification/rvgen/__main__.py | 5 +-
tools/verification/rvgen/rvgen/ltl2ba.py | 202 +++++++++--------------
2 files changed, 82 insertions(+), 125 deletions(-)
diff --git a/tools/verification/rvgen/__main__.py b/tools/verification/rvgen/__main__.py
index 5c923dc10d0f..0915cf86e43b 100644
--- a/tools/verification/rvgen/__main__.py
+++ b/tools/verification/rvgen/__main__.py
@@ -14,6 +14,7 @@ if __name__ == '__main__':
from rvgen.container import Container
from rvgen.ltl2k import ltl2k
from rvgen.automata import AutomataError
+ from rvgen.ltl2ba import LTLError
import argparse
import sys
@@ -57,8 +58,8 @@ if __name__ == '__main__':
sys.exit(1)
else:
monitor = Container(vars(params))
- except AutomataError as e:
- print(f"There was an error processing {params.spec}: {e}", file=sys.stderr)
+ except (AutomataError, LTLError) as e:
+ print(f"There was an error processing {params.spec}:\n{e}", file=sys.stderr)
sys.exit(1)
print(f"Writing the monitor into the directory {monitor.name}")
diff --git a/tools/verification/rvgen/rvgen/ltl2ba.py b/tools/verification/rvgen/rvgen/ltl2ba.py
index 016e7cf93bbb..7cebda61bce8 100644
--- a/tools/verification/rvgen/rvgen/ltl2ba.py
+++ b/tools/verification/rvgen/rvgen/ltl2ba.py
@@ -7,9 +7,7 @@
# https://doi.org/10.1007/978-0-387-34892-6_1
# With extra optimizations
-from ply.lex import lex
-from ply.yacc import yacc
-from .automata import AutomataError
+import lark
# Grammar:
# ltl ::= opd | ( ltl ) | ltl binop ltl | unop ltl
@@ -30,42 +28,41 @@ from .automata import AutomataError
# imply
# equivalent
-tokens = (
- 'AND',
- 'OR',
- 'IMPLY',
- 'UNTIL',
- 'ALWAYS',
- 'EVENTUALLY',
- 'NEXT',
- 'VARIABLE',
- 'LITERAL',
- 'NOT',
- 'LPAREN',
- 'RPAREN',
- 'ASSIGN',
-)
-
-t_AND = r'and'
-t_OR = r'or'
-t_IMPLY = r'imply'
-t_UNTIL = r'until'
-t_ALWAYS = r'always'
-t_NEXT = r'next'
-t_EVENTUALLY = r'eventually'
-t_VARIABLE = r'[A-Z_0-9]+'
-t_LITERAL = r'true|false'
-t_NOT = r'not'
-t_LPAREN = r'\('
-t_RPAREN = r'\)'
-t_ASSIGN = r'='
-t_ignore_COMMENT = r'\#.*'
-t_ignore = ' \t\n'
-
-def t_error(t):
- raise AutomataError(f"Illegal character '{t.value[0]}'")
-
-lexer = lex()
+GRAMMAR = r'''
+start: assign+
+
+assign: VARIABLE "=" _ltl
+
+_ltl: _opd | binop | unop
+
+_opd : VARIABLE
+ | LITERAL
+ | "(" _ltl ")"
+
+unop: UNOP _ltl
+UNOP: "always"
+ | "eventually"
+ | "next"
+ | "not"
+
+binop: _opd BINOP _ltl
+BINOP: "until"
+ | "and"
+ | "or"
+ | "imply"
+
+VARIABLE: /[A-Z_][A-Z0-9_]*/
+LITERAL: "true" | "false"
+
+COMMENT: "#" /.*/ "\n"
+%ignore COMMENT
+
+%import common.WS
+%ignore WS
+'''
+
+class LTLError(Exception):
+ "Exception raised for malformed linear temporal logic"
class GraphNode:
uid = 0
@@ -97,7 +94,7 @@ class GraphNode:
return self.id < other.id
class ASTNode:
- uid = 1
+ uid = 0
def __init__(self, op):
self.op = op
@@ -433,90 +430,49 @@ class Literal:
node.old |= {n}
return node.expand(node_set)
-def p_spec(p):
- '''
- spec : assign
- | assign spec
- '''
- if len(p) == 3:
- p[2].append(p[1])
- p[0] = p[2]
- else:
- p[0] = [p[1]]
-
-def p_assign(p):
- '''
- assign : VARIABLE ASSIGN ltl
- '''
- p[0] = (p[1], p[3])
-
-def p_ltl(p):
- '''
- ltl : opd
- | binop
- | unop
- '''
- p[0] = p[1]
-
-def p_opd(p):
- '''
- opd : VARIABLE
- | LITERAL
- | LPAREN ltl RPAREN
- '''
- if p[1] == "true":
- p[0] = ASTNode(Literal(True))
- elif p[1] == "false":
- p[0] = ASTNode(Literal(False))
- elif p[1] == '(':
- p[0] = p[2]
- else:
- p[0] = ASTNode(Variable(p[1]))
-
-def p_unop(p):
- '''
- unop : ALWAYS ltl
- | EVENTUALLY ltl
- | NEXT ltl
- | NOT ltl
- '''
- if p[1] == "always":
- op = AlwaysOp(p[2])
- elif p[1] == "eventually":
- op = EventuallyOp(p[2])
- elif p[1] == "next":
- op = NextOp(p[2])
- elif p[1] == "not":
- op = NotOp(p[2])
- else:
- raise AutomataError(f"Invalid unary operator {p[1]}")
-
- p[0] = ASTNode(op)
-
-def p_binop(p):
- '''
- binop : opd UNTIL ltl
- | opd AND ltl
- | opd OR ltl
- | opd IMPLY ltl
- '''
- if p[2] == "and":
- op = AndOp(p[1], p[3])
- elif p[2] == "until":
- op = UntilOp(p[1], p[3])
- elif p[2] == "or":
- op = OrOp(p[1], p[3])
- elif p[2] == "imply":
- op = ImplyOp(p[1], p[3])
- else:
- raise AutomataError(f"Invalid binary operator {p[2]}")
-
- p[0] = ASTNode(op)
-
-parser = yacc()
+class Transform(lark.visitors.Transformer):
+ def unop(self, node):
+ if node[0] == "always":
+ return ASTNode(AlwaysOp(node[1]))
+ if node[0] == "eventually":
+ return ASTNode(EventuallyOp(node[1]))
+ if node[0] == "next":
+ return ASTNode(NextOp(node[1]))
+ if node[0] == "not":
+ return ASTNode(NotOp(node[1]))
+ raise ValueError("Unknown operator %s" % node[0])
+
+ def binop(self, node):
+ if node[1] == "until":
+ return ASTNode(UntilOp(node[0], node[2]))
+ if node[1] == "and":
+ return ASTNode(AndOp(node[0], node[2]))
+ if node[1] == "or":
+ return ASTNode(OrOp(node[0], node[2]))
+ if node[1] == "imply":
+ return ASTNode(ImplyOp(node[0], node[2]))
+ raise ValueError("Unknown operator %s" % node[1])
+
+ def VARIABLE(self, args):
+ return ASTNode(Variable(args))
+
+ def LITERAL(self, args):
+ return ASTNode(Literal(args == "true"))
+
+ def start(self, node):
+ return node
+
+ def assign(self, node):
+ return node[0].op.name, node[1]
+
+parser = lark.Lark(GRAMMAR)
def parse_ltl(s: str) -> ASTNode:
- spec = parser.parse(s)
+ try:
+ spec = parser.parse(s)
+ except lark.exceptions.UnexpectedInput as e:
+ raise LTLError(str(e))
+ spec = Transform().transform(spec)
rule = None
subexpr = {}
@@ -528,7 +484,7 @@ def parse_ltl(s: str) -> ASTNode:
subexpr[assign[0]] = assign[1]
if rule is None:
- raise AutomataError("Please define your specification in the \"RULE = <LTL spec>\" format")
+ raise LTLError("Please define your specification in the \"RULE = <LTL spec>\" format")
for node in rule:
if not isinstance(node.op, Variable):
--
2.47.3
^ permalink raw reply related
* [PATCH v3 03/13] verification/rvgen: Implement state and transition parser based on Lark
From: Nam Cao @ 2026-06-08 8:56 UTC (permalink / raw)
To: Gabriele Monaco, Steven Rostedt, Wander Lairson Costa,
linux-trace-kernel, linux-kernel
Cc: Nam Cao
In-Reply-To: <cover.1780908661.git.namcao@linutronix.de>
The DOT parsing scripts directly parse the raw text and they are quite
fragile. If the input dot files' formats are slightly changed (for
instance, by breaking long some lines which is allowed by the DOT
language), the scripts would fail.
Prepare to move away from the raw text processing, implement parsers based
on Lark which parse states, transitions and constraints.
The parse results are not used yet. The existing scripts will be converted
one by one to them, and the raw text processing will eventually be removed.
Reviewed-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
tools/verification/rvgen/rvgen/automata.py | 216 +++++++++++++++++++++
1 file changed, 216 insertions(+)
diff --git a/tools/verification/rvgen/rvgen/automata.py b/tools/verification/rvgen/rvgen/automata.py
index 8649d982383d..b86275e7bf28 100644
--- a/tools/verification/rvgen/rvgen/automata.py
+++ b/tools/verification/rvgen/rvgen/automata.py
@@ -198,6 +198,164 @@ class ParseTree:
self.node_attrs = attributes_parser.node_attrs
self.edge_attrs = attributes_parser.edge_attrs
+class ConstraintCondition:
+ def __init__(self, env: str, op: str, val: str, unit=None):
+ self.env = env
+ self.op = op
+ self.val = val
+ self.unit = unit
+ if unit is None:
+ # try to infer unit from constants or parameters
+ val_for_unit = val.lower().replace("()", "")
+ if val_for_unit.endswith("_ns"):
+ self.unit = "ns"
+ if val_for_unit.endswith("_jiffies"):
+ self.unit = "j"
+
+class ConstraintRule:
+ grammar = r'''
+ rule: condition (OP condition)*
+
+ OP: "&&" | "||"
+
+ condition: ENV CMP_OP VAL UNIT?
+
+ ENV: CNAME
+
+ CMP_OP: "==" | "<=" | "<" | ">=" | ">"
+
+ VAL: /[0-9]+/
+ | /[A-Z_]+\(\)/
+ | /[A-Z_]+/
+ | /[a-z_]+\(\)/
+ | /[a-z_]+/
+
+ UNIT: "ns" | "us" | "ms" | "s"
+ '''
+
+ def __init__(self, c: ConstraintCondition):
+ '''
+ A list of pairs of
+ - the condition (e.g. is_constr_dl == 1)
+ - the logical operator ("||" or "&&") combining this
+ condition with the next one if it exists, otherwise None
+
+ TODO: Perhaps use an abstract syntax tree instead, because
+ this representation cannot capture precedence
+ '''
+ self.rules = [[c, None]]
+
+ def chain(self, op: str, c: ConstraintCondition):
+ self.rules[-1][1] = op
+ self.rules.append([c, None])
+
+class ConstraintReset:
+ def __init__(self, env):
+ self.env = env
+
+class StateLabelParser:
+ grammar = r'''
+ label: CNAME ("\\n" condition)?
+
+ %import common.CNAME
+ %import common.WS
+ %ignore WS
+ ''' + ConstraintRule.grammar
+
+ parser = lark.Lark(grammar, parser='lalr', start="label")
+
+ def __init__(self, label: str):
+ try:
+ tree = self.parser.parse(label)
+ except lark.exceptions.UnexpectedInput as exc:
+ raise(AutomataError(f"Unrecognised state \"{label}\"\n{exc}"))
+
+ self.state = tree.children[0]
+ self.constraint = None
+
+ if len(tree.children) == 2:
+ self.constraint = ConstraintCondition(*tree.children[1].children)
+ if self.constraint.op not in ("<", "<="):
+ raise AutomataError("State constraints must be clock expirations like"
+ f" clk<N ({label})")
+
+class EventLabelParser:
+ grammar = r'''
+ events: event ("\\n" event)*
+
+ event: name (";" guard)?
+
+ guard: reset
+ | rule
+ | rule ";" reset
+ | reset ";" rule
+
+ name: CNAME
+
+ reset: "reset" "(" ENV ")"
+
+ %import common.CNAME
+ %import common.WS
+ %ignore WS
+ ''' + ConstraintRule.grammar
+
+ parser = lark.Lark(grammar, parser='lalr', start="events")
+
+ class GetEvents(lark.visitors.Transformer):
+ def guard(self, args):
+ reset = None
+ rule = None
+ for arg in args:
+ if arg.data == "reset":
+ reset = ConstraintReset(arg.children[0])
+ elif arg.data == "rule":
+ conditions = arg.children
+ rule = ConstraintRule(conditions[0])
+ for i in range(1, len(conditions), 2):
+ rule.chain(conditions[i], conditions[i + 1])
+ return reset, rule
+
+ def OP(self, args):
+ return args
+
+ def condition(self, args):
+ return ConstraintCondition(*args)
+
+ def event(self, args):
+ assert(len(args) <= 2)
+ name = args[0]
+ rule, reset = None, None
+ if len(args) == 2:
+ reset, rule = args[1]
+ return name, reset, rule
+
+ def events(self, args):
+ return args
+
+ def name(self, args):
+ return args[0]
+
+ def __init__(self, label: str):
+ try:
+ tree = self.parser.parse(label)
+ self.events = self.GetEvents().transform(tree)
+ except lark.exceptions.UnexpectedInput as exc:
+ raise(AutomataError(f"Unrecognised event \"{label}\"\n{exc}"))
+
+class Transition:
+ def __init__(self, src: str, dst: str, event: str,
+ reset: ConstraintReset, rule: ConstraintRule):
+ self.src = src
+ self.dst = dst
+ self.event = event
+ self.rule = rule
+ self.reset = reset
+
+class State:
+ def __init__(self, name: str, inv: ConstraintCondition):
+ self.name = name
+ self.inv = inv
+
class _ConstraintKey:
"""Base class for constraint keys."""
@@ -252,6 +410,8 @@ class Automata:
self.name = model_name or self.__get_model_name()
self.__dot_lines = self.__open_dot()
self.__parse_tree = ParseTree(file_path)
+ self.transitions = self.__parse_transitions()
+ self._states, self._initial_state, self._final_states = self.__parse_states()
self.states, self.initial_state, self.final_states = self.__get_state_variables()
self.env_types = {}
self.env_stored = set()
@@ -327,6 +487,62 @@ class Automata:
return cursor
+ def __parse_transitions(self):
+ transitions = []
+
+ for edge in self.__parse_tree.edges:
+ attr = self.__parse_tree.edge_attrs.get(edge)
+ if not attr:
+ continue
+
+ label = attr.get("label")
+
+ src, dst = edge
+
+ parser = EventLabelParser(label)
+ for event, reset, rule in parser.events:
+ transitions.append(Transition(src, dst, event, reset, rule))
+
+ transitions.sort(key=lambda t : (t.src, t.event))
+ return transitions
+
+ def __parse_states(self):
+ initial_state = ""
+ states = []
+ final_states = []
+
+ for node in self.__parse_tree.nodes:
+ attr = self.__parse_tree.node_attrs[node]
+ label = attr.get("label")
+
+ if node.startswith(Automata.init_marker):
+ initial_state = node[len(Automata.init_marker):]
+
+ if not label:
+ continue
+
+ parser = StateLabelParser(label)
+ state = State(parser.state, parser.constraint)
+
+ states.append(state)
+
+ shape = attr.get("shape")
+ if shape in ("doublecircle", "ellipse"):
+ final_states.append(state)
+
+
+ initial_state = next((s for s in states if s.name == initial_state), None)
+ if not initial_state:
+ raise AutomataError("The automaton doesn't have an initial state")
+
+ if not final_states:
+ final_states.append(initial_state)
+
+ states.remove(initial_state)
+ states.sort(key=lambda s : s.name)
+ states.insert(0, initial_state)
+ return states, initial_state, final_states
+
def __get_state_variables(self) -> tuple[list[str], str, list[str]]:
# wait for node declaration
states = []
--
2.47.3
^ permalink raw reply related
* [PATCH v3 02/13] verification/rvgen: Introduce a parse tree for automata using Lark
From: Nam Cao @ 2026-06-08 8:56 UTC (permalink / raw)
To: Gabriele Monaco, Steven Rostedt, Wander Lairson Costa,
linux-trace-kernel, linux-kernel
Cc: Nam Cao
In-Reply-To: <cover.1780908661.git.namcao@linutronix.de>
The DOT parsing scripts directly parse the raw text and they are quite
fragile. If the input dot files' formats are slightly changed (for
instance, by breaking long some lines which is allowed by the DOT language
defined by graphviz), the scripts would fail.
To make the scripts robust, the parser should be implemented based on the
dot language specification, not based on how the existing dot files look.
As a first step, use Lark to implement a Parser based on the graphviz dot
language specification. The resulting parse tree is not used yet, but the
existing scripts will be converted one by one to use this new parse tree in
the follow-up commits.
Reviewed-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
tools/verification/rvgen/rvgen/automata.py | 186 +++++++++++++++++++++
1 file changed, 186 insertions(+)
diff --git a/tools/verification/rvgen/rvgen/automata.py b/tools/verification/rvgen/rvgen/automata.py
index b9f8149f7118..8649d982383d 100644
--- a/tools/verification/rvgen/rvgen/automata.py
+++ b/tools/verification/rvgen/rvgen/automata.py
@@ -13,6 +13,191 @@ import re
from typing import Iterator
from itertools import islice
+import lark
+
+class ParseTree:
+ # based on https://graphviz.org/doc/info/lang.html
+ # with the irrelevant stuffs (port and compass) removed
+ grammar = r'''
+ start: "strict"? ("graph" | "digraph") ID? "{" stmt_list "}"
+
+ stmt_list: (stmt ";"? stmt_list)?
+
+ stmt: node_stmt
+ | edge_stmt
+ | attr_stmt
+ | ID "=" ID
+ | subgraph
+
+ attr_stmt: attr_type attr_list
+
+ attr_type: "graph" -> graph
+ | "node" -> node
+ | "edge" -> edge
+
+ attr_list: "[" a_list? "]" attr_list?
+
+ a_list: ID "=" ID (";" | ",")? a_list?
+
+ edge_stmt: (node_id | subgraph) edgerhs attr_list?
+
+ edgerhs: edgeop (node_id | subgraph) edgerhs?
+
+ edgeop: "->" | "--"
+
+ node_stmt: node_id attr_list?
+
+ node_id: ID
+
+ subgraph: ("subgraph" ID?)? "{" stmt_list "}"
+
+ ID: CNAME
+ | /-?(\.[0-9]+|[0-9]+(\.[0-9]*))/
+ | ESCAPED_STRING
+
+ %import common.CNAME
+ %import common.ESCAPED_STRING
+ %import common.WS
+ %ignore WS
+ '''
+
+ @staticmethod
+ def parse_edge(tree: lark.Tree) -> tuple[str, str]:
+ # only support a simple node-to-node edge
+ nodes = []
+ for node in tree.iter_subtrees_topdown():
+ if node.data == "node_id":
+ nodes.append(node.children[0].strip('"'))
+
+ if len(nodes) != 2:
+ raise AutomataError("Only state-to-state transition is supported")
+
+ return tuple(nodes)
+
+ class ParseNodes(lark.visitors.Visitor):
+ def __init__(self, *args, **kwargs):
+ self.nodes = set()
+ super().__init__(*args, **kwargs)
+
+ def node_stmt(self, tree):
+ node_id = tree.children[0]
+ node = node_id.children[0].strip('"')
+ self.nodes.add(node)
+
+ class ParseEdges(lark.visitors.Visitor):
+ def __init__(self, *args, **kwargs):
+ self.edges = set()
+ super().__init__(*args, **kwargs)
+
+ def edge_stmt(self, tree):
+ edge = ParseTree.parse_edge(tree)
+ self.edges.add(edge)
+
+ class ParseAttributes(lark.visitors.Interpreter):
+ def __init__(self, *args, **kwargs):
+ '''
+ Stacks of default attributes. [0] is the default
+ attributes for the outermost scope, while [-1] is the
+ default attributes for the current scope.
+ '''
+ self.default_node_attrs = [{}]
+ self.default_edge_attrs = [{}]
+
+ self.node_attrs = {}
+ self.edge_attrs = {}
+
+ super().__init__(*args, **kwargs)
+
+ @staticmethod
+ def __get_attrs(stmt: lark.Tree) -> dict[str, str]:
+ attrs = {}
+
+ for node in stmt.iter_subtrees():
+ if node.data == "a_list":
+ attrs[node.children[0]] = node.children[1].strip('"')
+
+ return attrs
+
+
+ def subgraph(self, tree):
+ # We are entering a new scope, inherit the default
+ # attributes of the outer scope
+ self.default_node_attrs.append(self.default_node_attrs[-1].copy())
+ self.default_edge_attrs.append(self.default_edge_attrs[-1].copy())
+
+ children = self.visit_children(tree)
+
+ # Exiting the scope
+ del self.default_node_attrs[-1]
+ del self.default_edge_attrs[-1]
+
+ return children
+
+ def node_stmt(self, tree):
+ node_id = tree.children[0]
+ node = node_id.children[0].strip('"')
+
+ attrs = self.default_node_attrs[-1].copy()
+ attrs |= self.__get_attrs(tree)
+
+ if attrs:
+ if node in self.node_attrs:
+ self.node_attrs[node] = attrs | self.node_attrs[node]
+ else:
+ self.node_attrs[node] = attrs
+
+ return self.visit_children(tree)
+
+ def edge_stmt(self, tree):
+ edge = ParseTree.parse_edge(tree)
+
+ attrs = self.default_edge_attrs[-1].copy()
+ attrs |= self.__get_attrs(tree)
+
+ if attrs:
+ if edge in self.edge_attrs:
+ self.edge_attrs[edge] = attrs | self.edge_attrs[edge]
+ else:
+ self.edge_attrs[edge] = attrs
+
+ return self.visit_children(tree)
+
+ def attr_stmt(self, tree):
+ attr_type = tree.children[0].data
+ attrs = self.__get_attrs(tree)
+
+ if attr_type == "node":
+ self.default_node_attrs[-1] |= attrs
+ elif attr_type == "edge":
+ self.default_edge_attrs[-1] |= attrs
+ else:
+ # graph attributes are irrelevant
+ pass
+
+ self.visit_children(tree)
+
+ def __init__(self, dot_file):
+ parser = lark.Lark(self.grammar, parser='lalr')
+ node_parser = self.ParseNodes()
+ edge_parser = self.ParseEdges()
+ attributes_parser = self.ParseAttributes()
+
+ try:
+ with open(dot_file, "r") as f:
+ tree = parser.parse(f.read())
+ attributes_parser.visit(tree)
+ node_parser.visit(tree)
+ edge_parser.visit(tree)
+ except OSError as exc:
+ raise AutomataError(exc.strerror) from exc
+ except lark.exceptions.UnexpectedInput as exc:
+ raise AutomataError(str(exc))
+
+ self.nodes = node_parser.nodes
+ self.edges = edge_parser.edges
+ self.node_attrs = attributes_parser.node_attrs
+ self.edge_attrs = attributes_parser.edge_attrs
+
class _ConstraintKey:
"""Base class for constraint keys."""
@@ -66,6 +251,7 @@ class Automata:
self.__dot_path = file_path
self.name = model_name or self.__get_model_name()
self.__dot_lines = self.__open_dot()
+ self.__parse_tree = ParseTree(file_path)
self.states, self.initial_state, self.final_states = self.__get_state_variables()
self.env_types = {}
self.env_stored = set()
--
2.47.3
^ permalink raw reply related
* [PATCH v3 00/13] rv: Convert rvgen to Lark
From: Nam Cao @ 2026-06-08 8:56 UTC (permalink / raw)
To: Gabriele Monaco, Steven Rostedt, Wander Lairson Costa,
linux-trace-kernel, linux-kernel
Cc: Nam Cao
This series converts the linear temporal logic parser and the automata
parser into using Lark.
The LTL parser has been using ply - a parsing library. However, ply
was recently announced to be abandoned. Furthermore, ply does not
offer the features that lark has.
On the other hand, the automata parser is mostly raw text processing
which is quite fragile. For instance, by slightly deform wwnr.dot (but
does not make it an invalid dot file):
digraph state_automaton {
{node [shape = plaintext, style=invis, label=""] "__init_not_running"};
{node [shape = ellipse]
"not_running"};
{node [shape=plaintext] "not_running"};
{node [shape = plaintext] "running"};
"__init_not_running"
-> "not_running";
"not_running" [label = "not_running", color = green3];
"not_running" ->
"not_running" [ label = "wakeup" ];
"not_running" -> "running" [ label = "switch_in" ];
"running" [label = "running"];
"running" -> "not_running" [ label = "switch_out" ];
}
the parser would be broken. Furthermore, the code is a bit hard to
follow with raw text being stored in lots of variables and sometimes
it is hard to figure out what sort of text is stored in the variables
while reading the code.
This motivates me to convert the automata parser as well. The plan is:
- Introduce Lark and prepare the parsed states, transitions and
constraints
- Convert the parser piece by piece to the parsed results from Lark
- Delete the old code
I struggled with converting __find_inv_conflicts(). So I decided to
remove the dual clock representation in the HA monitors, which allows
me to delete __find_inv_conflicts() entirely. This makes the code
simpler overall.
After the series, the generated HA monitors are mostly unchanged,
except:
- Clock representation conversion is gone and
ha_check_invariant_[ns|jiffy]() takes a new argument
- The ordering in ha_verify_guards() is changed, but still
equivalent. This is because it is now sorted lexically.
The generated LTL monitors are sadly significantly different, but proved to
be equivalent with runtime testing. Further work will make LTL monitor
generation more consistent.
v3..v2: https://lore.kernel.org/lkml/cover.1779956342.git.namcao@linutronix.de/
- remove some redundant imports
- fix build failure due to passing wrong parameters to ha_invariant_passed_jiffy()
v2..v1: https://lore.kernel.org/lkml/cover.1777962130.git.namcao@linutronix.de/
- address human's reviews and sashiko's reviews
- handle lark's exception, yielding a much better error message
Nam Cao (13):
verification/rvgen: Switch LTL parser to Lark
verification/rvgen: Introduce a parse tree for automata using Lark
verification/rvgen: Implement state and transition parser based on
Lark
verification/rvgen: Convert __fill_verify_invariants_func() to Lark
verification/rvgen: Convert __fill_setup_invariants_func() to Lark
verification/rvgen: Convert __fill_verify_guards_func() to Lark
rv: Simplify hybrid automata monitors's clock variables
verification/rvgen: Simplify the generation for clock variables
verification/rvgen: Delete __parse_constraint()
verification/rvgen: Switch __get_event_variables() to Lark
verification/rvgen: Switch __create_matrix() to Lark
verification/rvgen: Remove the old state variables
verification/rvgen: Remove dead code
include/rv/ha_monitor.h | 64 +-
kernel/trace/rv/monitors/nomiss/nomiss.c | 18 +-
kernel/trace/rv/monitors/stall/stall.c | 2 +-
tools/verification/rvgen/__main__.py | 5 +-
tools/verification/rvgen/rvgen/automata.py | 643 +++++++++++++--------
tools/verification/rvgen/rvgen/dot2c.py | 10 +-
tools/verification/rvgen/rvgen/dot2k.py | 290 +++-------
tools/verification/rvgen/rvgen/ltl2ba.py | 202 +++----
8 files changed, 604 insertions(+), 630 deletions(-)
--
2.47.3
^ permalink raw reply
* Re: [PATCH v7 10/42] KVM: guest_memfd: Ensure pages are not in use before conversion
From: Vlastimil Babka (SUSE) @ 2026-06-08 8:55 UTC (permalink / raw)
To: ackerleytng, aik, andrew.jones, binbin.wu, brauner, chao.p.peng,
david, ira.weiny, jmattson, jthoughton, michael.roth, oupton,
pankaj.gupta, qperret, rick.p.edgecombe, rientjes, shivankg,
steven.price, tabba, willy, wyihan, yan.y.zhao, forkloop,
pratyush, suzuki.poulose, aneesh.kumar, liam, Paolo Bonzini,
Sean Christopherson, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
Kairui Song, Kemeng Shi, Nhat Pham, Baoquan He, Barry Song,
Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng,
Shakeel Butt, Kiryl Shutsemau, Jason Gunthorpe
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco
In-Reply-To: <20260522-gmem-inplace-conversion-v7-10-2f0fae496530@google.com>
On 5/23/26 02:17, Ackerley Tng via B4 Relay wrote:
> From: Ackerley Tng <ackerleytng@google.com>
>
> When converting memory to private in guest_memfd, it is necessary to ensure
> that the pages are not currently being accessed by any other part of the
> kernel or userspace to avoid any current user writing to guest private
> memory.
>
> guest_memfd checks for unexpected refcounts to determine whether a page is
> still in use. The only expected refcounts after unmapping the range
> requested for conversion are those that are held by guest_memfd itself.
Is it sufficient to only check, and not also freeze the refcount? (i.e.
using folio_ref_freeze()), because without freezing, anything (e.g.
compaction's pfn-based scanner) could do a speculative folio_try_get() and
the checked refcount becomes stale.
Might be ok if we know that no such speculative increment can result in
actually touching the page contents, and the extra refcount and something
inspecting the struct folio won't interfere with anything else. Then it
could be just a comment mentioning why it's safe.
IIRC the compaction's scanning can result in a migration here so it's
probably ok?
> Update the kvm_memory_attributes2 structure to include an error_offset
> field. This allows KVM to report the exact offset where a conversion
> failed to userspace. If the safety check fails, return -EAGAIN and copy
> the error_offset back to userspace so that it can potentially retry the
> operation or handle the failure gracefully.
>
> Suggested-by: David Hildenbrand <david@kernel.org>
> Co-developed-by: Vishal Annapurve <vannapurve@google.com>
> Signed-off-by: Vishal Annapurve <vannapurve@google.com>
> Reviewed-by: Fuad Tabba <tabba@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> ---
> include/uapi/linux/kvm.h | 3 ++-
> virt/kvm/guest_memfd.c | 68 ++++++++++++++++++++++++++++++++++++++++++++----
> 2 files changed, 65 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index e6bbf68a83813..0b55258573d3d 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1658,7 +1658,8 @@ struct kvm_memory_attributes2 {
> __u64 size;
> __u64 attributes;
> __u64 flags;
> - __u64 reserved[12];
> + __u64 error_offset;
> + __u64 reserved[11];
> };
>
> #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 426917d22a2b6..2767992955752 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -572,9 +572,45 @@ static int kvm_gmem_mas_preallocate(struct ma_state *mas, u64 attributes,
> return mas_preallocate(mas, xa_mk_value(attributes), GFP_KERNEL);
> }
>
> +static bool kvm_gmem_is_safe_for_conversion(struct inode *inode, pgoff_t start,
> + size_t nr_pages, pgoff_t *err_index)
> +{
> + struct address_space *mapping = inode->i_mapping;
> + const int filemap_get_folios_refcount = 1;
> + pgoff_t last = start + nr_pages - 1;
> + struct folio_batch fbatch;
> + bool safe = true;
> + pgoff_t next;
> + int i;
> +
> + folio_batch_init(&fbatch);
> +
> + next = start;
> + while (safe && filemap_get_folios(mapping, &next, last, &fbatch)) {
> +
> + for (i = 0; i < folio_batch_count(&fbatch); ++i) {
> + struct folio *folio = fbatch.folios[i];
> +
> + if (folio_ref_count(folio) !=
> + folio_nr_pages(folio) + filemap_get_folios_refcount) {
> + safe = false;
> + *err_index = max(start, folio->index);
> + break;
> + }
> + }
> +
> + folio_batch_release(&fbatch);
> + cond_resched();
> + }
> +
> + return safe;
> +}
> +
> static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
> - size_t nr_pages, uint64_t attrs)
> + size_t nr_pages, uint64_t attrs,
> + pgoff_t *err_index)
> {
> + bool to_private = attrs & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> struct address_space *mapping = inode->i_mapping;
> struct gmem_inode *gi = GMEM_I(inode);
> pgoff_t end = start + nr_pages;
> @@ -588,8 +624,21 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
>
> mas_init(&mas, mt, start);
> r = kvm_gmem_mas_preallocate(&mas, attrs, start, nr_pages);
> - if (r)
> + if (r) {
> + *err_index = start;
> goto out;
> + }
> +
> + if (to_private) {
> + unmap_mapping_pages(mapping, start, nr_pages, false);
> +
> + if (!kvm_gmem_is_safe_for_conversion(inode, start, nr_pages,
> + err_index)) {
> + mas_destroy(&mas);
> + r = -EAGAIN;
> + goto out;
> + }
> + }
>
> /*
> * From this point on guest_memfd has performed necessary
> @@ -609,9 +658,10 @@ static long kvm_gmem_set_attributes(struct file *file, void __user *argp)
> struct gmem_file *f = file->private_data;
> struct inode *inode = file_inode(file);
> struct kvm_memory_attributes2 attrs;
> + pgoff_t err_index;
> size_t nr_pages;
> pgoff_t index;
> - int i;
> + int i, r;
>
> if (copy_from_user(&attrs, argp, sizeof(attrs)))
> return -EFAULT;
> @@ -635,8 +685,16 @@ static long kvm_gmem_set_attributes(struct file *file, void __user *argp)
>
> nr_pages = attrs.size >> PAGE_SHIFT;
> index = attrs.offset >> PAGE_SHIFT;
> - return __kvm_gmem_set_attributes(inode, index, nr_pages,
> - attrs.attributes);
> + r = __kvm_gmem_set_attributes(inode, index, nr_pages, attrs.attributes,
> + &err_index);
> + if (r) {
> + attrs.error_offset = ((uint64_t)err_index) << PAGE_SHIFT;
> +
> + if (copy_to_user(argp, &attrs, sizeof(attrs)))
> + return -EFAULT;
> + }
> +
> + return r;
> }
>
> static long kvm_gmem_ioctl(struct file *file, unsigned int ioctl,
>
^ permalink raw reply
* Re: [PATCH v7 14/42] KVM: guest_memfd: Handle lru_add fbatch refcounts during conversion safety check
From: Vlastimil Babka (SUSE) @ 2026-06-08 8:45 UTC (permalink / raw)
To: ackerleytng, aik, andrew.jones, binbin.wu, brauner, chao.p.peng,
david, ira.weiny, jmattson, jthoughton, michael.roth, oupton,
pankaj.gupta, qperret, rick.p.edgecombe, rientjes, shivankg,
steven.price, tabba, willy, wyihan, yan.y.zhao, forkloop,
pratyush, suzuki.poulose, aneesh.kumar, liam, Paolo Bonzini,
Sean Christopherson, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
Kairui Song, Kemeng Shi, Nhat Pham, Baoquan He, Barry Song,
Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng,
Shakeel Butt, Kiryl Shutsemau, Jason Gunthorpe
Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco
In-Reply-To: <20260522-gmem-inplace-conversion-v7-14-2f0fae496530@google.com>
On 5/23/26 02:17, Ackerley Tng via B4 Relay wrote:
> From: Ackerley Tng <ackerleytng@google.com>
>
> When checking if a guest_memfd folio is safe for conversion, its refcount
> is examined. A folio may be present in a per-CPU lru_add fbatch, which
> temporarily increases its refcount. This can lead to a false positive,
> incorrectly indicating that the folio is in use and preventing the
> conversion, even if it is otherwise safe. The conversion process might not
> be on the same CPU that holds the folio in its fbatch, making a simple
> per-CPU check insufficient.
>
> To address this, drain all CPUs' lru_add fbatches if an unexpectedly high
> refcount is encountered during the safety check. This is performed at most
> once per conversion request. Draining only if the folio in question may be
> lru cached.
>
> guest_memfd folios are unevictable, so they can only reside in the lru_add
> fbatch. If the folio's refcount is still unsafe after draining, then the
> conversion is truly deemed unsafe.
>
> Reviewed-by: Fuad Tabba <tabba@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
^ permalink raw reply
* Re: [PATCH] rethook: Use tsk->on_cpu to check task execution state
From: Tengda Wu @ 2026-06-08 8:31 UTC (permalink / raw)
To: Masami Hiramatsu
Cc: Peter Zijlstra, Josh Poimboeuf, Steven Rostedt, Mathieu Desnoyers,
Alexei Starovoitov, linux-trace-kernel, linux-kernel
In-Reply-To: <20260608115646.97d80d30aed182d468496449@kernel.org>
On 2026/6/8 10:56, Masami Hiramatsu wrote:
> On Mon, 8 Jun 2026 09:52:37 +0800
> Tengda Wu <wutengda@huaweicloud.com> wrote:
>
>>
>>
>> On 2026/6/5 21:43, Masami Hiramatsu wrote:
>>> On Thu, 4 Jun 2026 11:34:45 +0200
>>> Peter Zijlstra <peterz@infradead.org> wrote:
>>>
>>>> On Mon, Jun 01, 2026 at 08:40:01AM +0900, Masami Hiramatsu wrote:
>>>>
>>>>> Peter, is it OK to drop @rq from task_on_cpu()?
>>>>
>>>> Sure.
>>>>
>>>>> Then we can use it from rethook.
>>>>
>>>> Well, it is in sched/sched.h, which is an internal header, and no you
>>>> cannot use that header in rethook.
>>>
>>> Ah, OK. Hmm, then we should not use it. Maybe ->on_cpu is also internal
>>> state?
>>>
>>>>
>>>> But lets step back first, what is the actual problem here, why are we
>>>> looking at ->on_cpu at all?
>>>
>>> Tengda, can you explain it?
>>> I think you want to take a stacktrace on !current process, and
>>> rethook_find_ret_addr() is rejected i the task is running state.
>>>
>>> But if you can share actual situation what you need, it is
>>> helpful for us to understand.
>>>
>>> Thank you,
>>>
>>>
>>
>>
>> Sure.
>>
>> Background: We are verifying the support of live patches for functions that
>> have a kretprobe. The specific verification method is as follows:
>>
>> We construct a function foo() that calls bar():
>>
>> void bar(void)
>> {
>> for (;;) {
>> schedule();
>> }
>> }
>>
>> void foo(void)
>> {
>> bar();
>> }
>>
>> A kretprobe is attached to bar():
>>
>> echo 'r:rp1 bar' > /sys/kernel/tracing/kprobe_events
>> echo 1 > /sys/kernel/tracing/events/kprobes/rp1/enable
>>
>> Then foo() is triggered. The expected behavior is that bar() will call
>> schedule() and yield the CPU.
>>
>> After that, the live patch is activated to attempt replacing the implementation
>> of foo(). The expectation is that this should succeed.
>>
>> However, in reality, because the task that called schedule() is still in the
>> RUNNING state, the condition task_is_running(tsk) inside rethook_find_ret_addr()
>> is not satisfied, causing the function to return early. This, in turn,
>> prevents stack_trace_save_tsk_reliable() from determining the stack as
>> reliable, leading to a failure in activating the live patch.
>
> Hmm is the bar() doing infinite loop, or limited loop but take a long time
> so just yield a while? Anyway, it seems like a non-good design pattern.
> Is it possible to avoid busy loops and instead use Workers, or wait for
> something to complete or for input within a loop?
>
>>
>> **Not sure if this is correct:**
>>
>> We believe that after a task voluntarily calls schedule(), when the stack
>> is expected to be reliable, it is a safe time to activate a live patch.
>
> In this case, I don't know how to block the loop inside the bar.
> Even if !tsk->on_cpu, the tsk can restart running right after checking
> the flag.
>
The infinite loop in bar() is indeed a poor design pattern. This test
case is only artificial, not from real-world code. It is merely
intended to verify live patch support for various cases.
However, the point you raised has indeed made me think. I realize that
checking only tsk->on_cpu is not sufficient -- there is also a race
condition where the task could be scheduled back onto a CPU right after
the check. I need to re-examine the validity of this test case and
whether it represents a safe live patch activation scenario.
Thank you again for your patience and for pointing out these
fundamental issues. Your guidance is much appreciated.
Best regards,
Tengda
^ permalink raw reply
* Re: [PATCH v2 13/13] verification/rvgen: Remove dead code
From: Nam Cao @ 2026-06-08 8:29 UTC (permalink / raw)
To: Gabriele Monaco, Wander Lairson Costa, Steven Rostedt,
linux-trace-kernel, linux-kernel
In-Reply-To: <310e485ce394c7d258e142e14d5e51c5e15e1d30.camel@redhat.com>
Gabriele Monaco <gmonaco@redhat.com> writes:
> You might want to remove unused imports (linters should help you with
> that too):
> * re, typing.Iterator, and itertools.islice from automata.py
> * deque and ConstraintRule from dot2k
Thanks, I overlooked those warnings due to the noises from the existing
warnings :(
Let me clean up the existing pylint issues, so that new warnings are
easily noticed.
Nam
^ permalink raw reply
* Re: [PATCH next] kernel/trace/trace_printk: Use kstrdup() instead of kmalloc() and strcpy()
From: Masami Hiramatsu @ 2026-06-08 8:27 UTC (permalink / raw)
To: david.laight.linux
Cc: Kees Cook, linux-hardening, Arnd Bergmann, linux-kernel,
linux-trace-kernel, Masami Hiramatsu, Steven Rostedt
In-Reply-To: <20260606202633.5018-34-david.laight.linux@gmail.com>
On Sat, 6 Jun 2026 21:26:28 +0100
david.laight.linux@gmail.com wrote:
> From: David Laight <david.laight.linux@gmail.com>
>
> Signed-off-by: David Laight <david.laight.linux@gmail.com>
> ---
> This is one of a group of patches that remove potentially unbounded
> strcpy() calls.
>
> They are mostly replaced by strscpy() or, when strlen() has just been
> called, with memcpy() (usually including the '\0').
>
> Calls with copy string literals into arrays are left unchanged.
> They are safe and easily detected as such.
>
> The changes were made by getting the compiler to detect the calls and
> then fixing the code by hand.
>
> Note that all the changes are only compile tested.
>
> Some Makefiles were changed to allow files to contain strcpy().
> As well as 'difficult to fix' files, this included 'show' functions
> as they really need to use sysfs_emit() or seq_printf().
>
> All the patches are being sent individually to avoid very long cc lists.
> Apologies for the terse commit messages and likely unexpected tags.
> (There are about 100 patches in total.)
>
This looks good to me.
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Thanks,
> kernel/trace/trace_printk.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
> index 3ea17af60169..98171a2398e4 100644
> --- a/kernel/trace/trace_printk.c
> +++ b/kernel/trace/trace_printk.c
> @@ -71,10 +71,9 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
> fmt = NULL;
> tb_fmt = kmalloc_obj(*tb_fmt);
> if (tb_fmt) {
> - fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
> + fmt = kstrdup(*iter, GFP_KERNEL);
> if (fmt) {
> list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
> - strcpy(fmt, *iter);
> tb_fmt->fmt = fmt;
> } else
> kfree(tb_fmt);
> --
> 2.39.5
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCHv8 bpf-next 28/29] selftests/bpf: Add tracing multi attach benchmark test
From: Jiri Olsa @ 2026-06-08 8:25 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, bpf,
linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman, Song Liu,
Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <DJ30RPYRXESZ.2P3LP98X55VMG@gmail.com>
On Sun, Jun 07, 2026 at 11:13:32AM -0700, Alexei Starovoitov wrote:
> On Sat Jun 6, 2026 at 5:39 AM PDT, Jiri Olsa wrote:
> > Adding benchmark test that attaches to (almost) all allowed tracing
> > functions and display attach/detach times.
> >
> > # ./test_progs -t tracing_multi_bench_attach -v
> > bpf_testmod.ko is already unloaded.
> > Loading bpf_testmod.ko...
> > Successfully loaded bpf_testmod.ko.
> > serial_test_tracing_multi_bench_attach:PASS:btf__load_vmlinux_btf 0 nsec
> > serial_test_tracing_multi_bench_attach:PASS:tracing_multi_bench__open_and_load 0 nsec
> > serial_test_tracing_multi_bench_attach:PASS:get_syms 0 nsec
> > serial_test_tracing_multi_bench_attach:PASS:bpf_program__attach_tracing_multi 0 nsec
> > serial_test_tracing_multi_bench_attach: found 51186 functions
> > serial_test_tracing_multi_bench_attach: attached in 1.295s
> > serial_test_tracing_multi_bench_attach: detached in 0.243s
>
> ...
>
> > + if (!ASSERT_OK(bpf_get_ksyms(&ksyms, true), "get_syms"))
> > + goto cleanup;
> > +
> > + /* Get all ftrace 'safe' symbols.. */
> > + for (i = 0; i < ksyms->filtered_cnt; i++) {
> > + if (!tsearch(&ksyms->filtered_syms[i], &root, compare)) {
> > + ASSERT_FAIL("tsearch failed");
> > + goto cleanup;
> > + }
> > + }
> > +
> > + /* ..and filter them through BTF and btf_type_is_traceable_func. */
> > + nr = btf__type_cnt(btf);
> > + for (type_id = 1; type_id < nr; type_id++) {
> > + const struct btf_type *type;
> > + const char *str;
> > +
> > + type = btf__type_by_id(btf, type_id);
> > + if (!type)
> > + break;
> > +
> > + if (BTF_INFO_KIND(type->info) != BTF_KIND_FUNC)
> > + continue;
> > +
> > + str = btf__name_by_offset(btf, type->name_off);
> > + if (!str)
> > + break;
> > +
> > + if (!tfind(&str, &root, compare))
> > + continue;
> > +
> > + if (!btf_type_is_traceable_func(btf, type))
> > + continue;
> > +
> > + err = libbpf_ensure_mem((void **) &ids, &cap, sizeof(*ids), cnt + 1);
> > + if (err)
> > + goto cleanup;
> > +
> > + ids[cnt++] = type_id;
> > + }
>
> This filtering wasn't enough.
> I've added removal of duplicates here while applying:
>
> + /*
> + * Collect names that are not unique in kallsyms. The kernel resolves a
> + * tracing-multi BTF id to an address with kallsyms_lookup_name(), which
> + * returns the first symbol of that name. For a duplicate name that may
> + * be a different (non-ftrace-able) instance than the ftrace-able one in
> + * available_filter_functions, so attaching to it by BTF id fails with
> + * -ENOENT (e.g. t_start/t_next/t_stop). ksyms->syms is sorted by name,
> + * so equal names are adjacent.
> + */
> + for (i = 1; i < ksyms->sym_cnt; i++) {
> + if (strcmp(ksyms->syms[i].name, ksyms->syms[i - 1].name))
> + continue;
> + if (!tsearch(&ksyms->syms[i].name, &dups, compare)) {
> + ASSERT_FAIL("tsearch failed");
> + goto cleanup;
> + }
> + }
>
>
> + /* Skip names that are not unique in kallsyms, see above. */
> + if (tfind(&str, &dups, compare))
> + continue;
>
>
> As claude explains it:
> ----
> 1. The kernel attaches tracing_multi by BTF id. To get an address it resolves
> the BTF function name via kallsyms_lookup_name(tname) and requires
> ftrace_location(addr) — kernel/bpf/verifier.c:19380:
> addr = kallsyms_lookup_name(tname);
> ...
> if (!addr || !ftrace_location(addr))
> return -ENOENT;
> 2. t_start/t_next/t_stop each have 5 instances in this kernel. Only one is
> ftrace-able — the copies in kernel/trace/* are built notrace (ftrace's Makefile
> strips -pg), so only the unrelated copy is in available_filter_functions:
> 3. kallsyms_lookup_name() returns the lowest-address instance among equal names
> (exact strcmp, lowest seq). That instance has no fentry → ftrace_location()
> returns 0 → -ENOENT, which aborts the whole all-or-nothing bench attach.
>
> Why the bench includes them: it intersects BTF FUNC names with
> available_filter_functions names. Since some t_start is ftrace-able, the name
> passes the filter — but the kernel resolves the wrong (non-ftrace-able)
> t_start. The author's kernel apparently had the ftrace-able copy at the lowest
> address, so it passed there.
>
> This is a pre-existing limitation, not multi-specific: single fentry attach by
> BTF id uses the same kallsyms_lookup_name(tname) path (verifier.c:19120) — you
> can't reliably fentry-attach to any duplicate-named function on this kernel
> either.
> ----
strange I never triggered that.. but makes sense
>
> Maybe we should adjust bpf_get_ksyms() instead. Not sure.
the only other user is test_kprobe_multi_bench_attach which does
not care about this, so I think at this point keeping this in the
serial_test_tracing_multi_bench_attach is enough for now
thanks,
jirka
^ permalink raw reply
* Re: [PATCH mm-unstable v19 14/14] Documentation: mm: update the admin guide for mTHP collapse
From: Lance Yang @ 2026-06-08 7:41 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe, bagasdotme
In-Reply-To: <20260605161422.213817-15-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:21AM -0600, Nico Pache wrote:
>Now that we can collapse to mTHPs lets update the admin guide to
>reflect these changes and provide proper guidance on how to utilize it.
>
>Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
>Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
Reviewed-by: Lance Yang <lance.yang@linux.dev>
^ permalink raw reply
* Re: [PATCH mm-unstable v19 12/14] mm/khugepaged: avoid unnecessary mTHP collapse attempts
From: Lance Yang @ 2026-06-08 7:36 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe, usama.arif
In-Reply-To: <20260605161422.213817-13-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:19AM -0600, Nico Pache wrote:
>There are cases where, if an attempted collapse fails, all subsequent
>orders are guaranteed to also fail. Avoid these collapse attempts by
>bailing out early.
>
>Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
>Acked-by: Usama Arif <usama.arif@linux.dev>
>Acked-by: David Hildenbrand (Arm) <david@kernel.org>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
Reviewed-by: Lance Yang <lance.yang@linux.dev>
^ permalink raw reply
* Re: [PATCH mm-unstable v19 10/14] mm/khugepaged: introduce collapse_possible_orders helper functions
From: Lance Yang @ 2026-06-08 7:27 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe
In-Reply-To: <20260605161422.213817-11-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:17AM -0600, Nico Pache wrote:
>Add collapse_possible_orders() to generalize THP order eligibility. The
>function determines which THP orders are permitted based on collapse
>context (khugepaged vs madv_collapse). We also add collapse_possible()
>as a thin wrapper around collapse_possible_orders() that returns a bool
>rather than the whole bitmap.
>
>This consolidates collapse configuration logic and provides a clean
>interface for future mTHP collapse support where the orders may be
>different.
>
>Acked-by: David Hildenbrand (Arm) <david@kernel.org>
>Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
Reviewed-by: Lance Yang <lance.yang@linux.dev>
^ permalink raw reply
* Re: [PATCH mm-unstable v19 09/14] mm/khugepaged: improve tracepoints for mTHP orders
From: Lance Yang @ 2026-06-08 7:19 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe
In-Reply-To: <20260605161422.213817-10-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:16AM -0600, Nico Pache wrote:
>Add the order to the mm_collapse_huge_page<_swapin,_isolate> tracepoints to
>give better insight into what order is being operated at for.
>
>Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
>Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>Acked-by: David Hildenbrand (Arm) <david@kernel.org>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
Reviewed-by: Lance Yang <lance.yang@linux.dev>
^ permalink raw reply
* Re: [PATCH mm-unstable v19 08/14] mm/khugepaged: add per-order mTHP collapse failure statistics
From: Lance Yang @ 2026-06-08 7:13 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe
In-Reply-To: <20260605161422.213817-9-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:15AM -0600, Nico Pache wrote:
>Add three new mTHP statistics to track collapse failures for different
>orders when encountering swap PTEs, excessive none PTEs, and shared PTEs:
>
>- collapse_exceed_swap_pte: Increment when mTHP collapse fails due to
> encountering a swap PTE.
>
>- collapse_exceed_none_pte: Counts when mTHP collapse fails due to
> exceeding the none PTE threshold for the given order
>
>- collapse_exceed_shared_pte: Counts when mTHP collapse fails due to
> encountering a shared PTE.
>
>These statistics complement the existing THP_SCAN_EXCEED_* events by
>providing per-order granularity for mTHP collapse attempts. The stats are
>exposed via sysfs under
>`/sys/kernel/mm/transparent_hugepage/hugepages-*/stats/` for each
>supported hugepage size.
>
>As we currently do not support collapsing mTHPs that contain a swap or
>shared entry, those statistics keep track of how often we are
>encountering failed mTHP collapses due to these restrictions.
>
>We will add support for mTHP collapse for anonymous pages next; lets also
>track when this happens at the PMD level within the per-mTHP stats.
>
>Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
>Acked-by: David Hildenbrand (Arm) <david@kernel.org>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
Reviewed-by: Lance Yang <lance.yang@linux.dev>
^ permalink raw reply
* Re: [PATCH mm-unstable v19 07/14] mm/khugepaged: skip collapsing mTHP to smaller orders
From: Lance Yang @ 2026-06-08 6:59 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe, usama.arif
In-Reply-To: <20260605161422.213817-8-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:14AM -0600, Nico Pache wrote:
>khugepaged may try to collapse a mTHP to a folio of equal or smaller size,
>possibly resulting in a partially mapped source folio, which is undesired.
>Skip these cases until we have a way to check if its ok to collapse to a
>smaller mTHP size (like in the case of a partially mapped folio). This
>check is not done during the scan phase as the current collapse order is
>unknown at that time.
>
>This patch is inspired by Dev Jain's work on khugepaged mTHP support [1].
>
>[1] https://lore.kernel.org/lkml/20241216165105.56185-11-dev.jain@arm.com/
>
>Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
>Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>Acked-by: David Hildenbrand (arm) <david@kernel.org>
>Acked-by: Usama Arif <usama.arif@linux.dev>
>Co-developed-by: Dev Jain <dev.jain@arm.com>
>Signed-off-by: Dev Jain <dev.jain@arm.com>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
> mm/khugepaged.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
>diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>index c2769d82a719..191e529c185c 100644
>--- a/mm/khugepaged.c
>+++ b/mm/khugepaged.c
>@@ -697,6 +697,14 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
> goto out;
> }
> }
>+ /*
>+ * TODO: In some cases of partially-mapped folios, we'd actually
>+ * want to collapse.
>+ */
Partially mapped folios can be handled later :)
Reviewed-by: Lance Yang <lance.yang@linux.dev>
>+ if (!is_pmd_order(order) && folio_order(folio) >= order) {
>+ result = SCAN_PTE_MAPPED_HUGEPAGE;
>+ goto out;
>+ }
>
> if (folio_test_large(folio)) {
> struct folio *f;
>--
>2.54.0
>
>
^ permalink raw reply
* Re: [PATCH mm-unstable v19 06/14] mm/khugepaged: generalize collapse_huge_page for mTHP collapse
From: Lance Yang @ 2026-06-08 4:54 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe
In-Reply-To: <20260605161422.213817-7-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:13AM -0600, Nico Pache wrote:
>Pass an order to collapse_huge_page to support collapsing anon memory to
>arbitrary orders within a PMD. order indicates what mTHP size we are
>attempting to collapse to.
>
>For non-PMD collapse we must leave the anon VMA write locked until after
>we collapse the mTHP-- in the PMD case all the pages are isolated, but in
>the mTHP case this is not true, and we must keep the lock to prevent
>access/changes to the page tables. This can happen if the rmap walkers hit
>a pmd_none while the PMD entry is currently unavailable due to being
>temporarily removed during the collapse phase.
>
>To properly establish the page table hierarchy without violating any
>expectations from certain architectures (e.g. MIPS), we must make sure to
>have the PMD reinstalled before the PTEs, and hold both PTE/PMD locks
>before calling update_mmu_cache_range() (if they are distinct locks).
>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
Nothing else jumped out at me. Anything left can be sorted out later, as
David and Lorenzo said :)
Reviewed-by: Lance Yang <lance.yang@linux.dev>
^ permalink raw reply
* Re: [PATCH mm-unstable v19 05/14] mm/khugepaged: require collapse_huge_page to enter/exit with the lock dropped
From: Lance Yang @ 2026-06-08 4:34 UTC (permalink / raw)
To: npache
Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
zokeefe
In-Reply-To: <20260605161422.213817-6-npache@redhat.com>
On Fri, Jun 05, 2026 at 10:14:12AM -0600, Nico Pache wrote:
>Currently the collapse_huge_page function requires the mmap_read_lock to
>enter with it held, and exit with it dropped. This function moves the
>unlock into its parent caller, and changes this semantic to requiring it
>to enter/exit with it always unlocked.
>
>In future patches, we need this expectation, as for in mTHP collapse, we
>may have already dropped the lock, and do not want to conditionally
>check for this by passing through the lock_dropped variable.
>
>No functional change is expected as one of the first things the
>collapse_huge_page function does is drop this lock before allocating the
>hugepage.
>
>Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
>Acked-by: David Hildenbrand (Arm) <david@kernel.org>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
Reviewed-by: Lance Yang <lance.yang@linux.dev>
^ permalink raw reply
* Re: [PATCH] rethook: Use tsk->on_cpu to check task execution state
From: Masami Hiramatsu @ 2026-06-08 2:56 UTC (permalink / raw)
To: Tengda Wu, Josh Poimboeuf
Cc: Peter Zijlstra, Steven Rostedt, Mathieu Desnoyers,
Alexei Starovoitov, linux-trace-kernel, linux-kernel
In-Reply-To: <679a1c8f-1e4d-4ae5-83e1-d0068e6de1a6@huaweicloud.com>
On Mon, 8 Jun 2026 09:52:37 +0800
Tengda Wu <wutengda@huaweicloud.com> wrote:
>
>
> On 2026/6/5 21:43, Masami Hiramatsu wrote:
> > On Thu, 4 Jun 2026 11:34:45 +0200
> > Peter Zijlstra <peterz@infradead.org> wrote:
> >
> >> On Mon, Jun 01, 2026 at 08:40:01AM +0900, Masami Hiramatsu wrote:
> >>
> >>> Peter, is it OK to drop @rq from task_on_cpu()?
> >>
> >> Sure.
> >>
> >>> Then we can use it from rethook.
> >>
> >> Well, it is in sched/sched.h, which is an internal header, and no you
> >> cannot use that header in rethook.
> >
> > Ah, OK. Hmm, then we should not use it. Maybe ->on_cpu is also internal
> > state?
> >
> >>
> >> But lets step back first, what is the actual problem here, why are we
> >> looking at ->on_cpu at all?
> >
> > Tengda, can you explain it?
> > I think you want to take a stacktrace on !current process, and
> > rethook_find_ret_addr() is rejected i the task is running state.
> >
> > But if you can share actual situation what you need, it is
> > helpful for us to understand.
> >
> > Thank you,
> >
> >
>
>
> Sure.
>
> Background: We are verifying the support of live patches for functions that
> have a kretprobe. The specific verification method is as follows:
>
> We construct a function foo() that calls bar():
>
> void bar(void)
> {
> for (;;) {
> schedule();
> }
> }
>
> void foo(void)
> {
> bar();
> }
>
> A kretprobe is attached to bar():
>
> echo 'r:rp1 bar' > /sys/kernel/tracing/kprobe_events
> echo 1 > /sys/kernel/tracing/events/kprobes/rp1/enable
>
> Then foo() is triggered. The expected behavior is that bar() will call
> schedule() and yield the CPU.
>
> After that, the live patch is activated to attempt replacing the implementation
> of foo(). The expectation is that this should succeed.
>
> However, in reality, because the task that called schedule() is still in the
> RUNNING state, the condition task_is_running(tsk) inside rethook_find_ret_addr()
> is not satisfied, causing the function to return early. This, in turn,
> prevents stack_trace_save_tsk_reliable() from determining the stack as
> reliable, leading to a failure in activating the live patch.
Hmm is the bar() doing infinite loop, or limited loop but take a long time
so just yield a while? Anyway, it seems like a non-good design pattern.
Is it possible to avoid busy loops and instead use Workers, or wait for
something to complete or for input within a loop?
>
> **Not sure if this is correct:**
>
> We believe that after a task voluntarily calls schedule(), when the stack
> is expected to be reliable, it is a safe time to activate a live patch.
In this case, I don't know how to block the loop inside the bar.
Even if !tsk->on_cpu, the tsk can restart running right after checking
the flag.
> Additionally, a similar tsk->on_cpu check can be found elsewhere in the
> kernel (See task_on_another_cpu() in arch/x86/include/asm/unwind.h).
> Therefore, we propose changing the task_is_running(tsk) condition to
> tsk->on_cpu.
Yes, but the caller said there is another check to ensure the race.
/*
* Refuse to unwind the stack of a task while it's executing on another
* CPU. This check is racy, but that's ok: the unwinder has other
* checks to prevent it from going off the rails.
*/
if (task_on_another_cpu(task))
goto err;
Josh, do you know how this avoid the race case?
Thank you,
>
> Thanks,
> Tengda
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCH v2 3/6] bootconfig: render embedded bootconfig as a kernel cmdline at build time
From: Masami Hiramatsu @ 2026-06-08 2:29 UTC (permalink / raw)
To: Breno Leitao
Cc: Andrew Morton, Nathan Chancellor, paulmck, Nicolas Schier,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, linux-kernel, linux-trace-kernel, linux-kbuild,
bpf, kernel-team
In-Reply-To: <20260605-bootconfig_using_tools-v2-3-d309f544b5f7@debian.org>
Hi Breno,
On Fri, 05 Jun 2026 05:03:34 -0700
Breno Leitao <leitao@debian.org> wrote:
> diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile
> index 90eb47c9d8de..aa75a7828685 100644
> --- a/tools/bootconfig/Makefile
> +++ b/tools/bootconfig/Makefile
> @@ -15,10 +15,14 @@ override CFLAGS += -Wall -g -I$(CURDIR)/include
> ALL_TARGETS := bootconfig
> ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
>
> -all: $(ALL_PROGRAMS) test
> +all: $(ALL_PROGRAMS)
>
> +# bootconfig is a build host tool: Kbuild's prepare hook runs it on the
> +# build machine to render the embedded cmdline, so always compile it with
> +# $(HOSTCC). Using $(CC) would cross-compile it under ARCH=... builds and
> +# fail to exec on the host ("Exec format error").
> $(OUTPUT)bootconfig: main.c include/linux/bootconfig.h $(LIBSRC)
> - $(CC) $(filter %.c,$^) $(CFLAGS) $(LDFLAGS) -o $@
> + $(HOSTCC) $(filter %.c,$^) $(CFLAGS) $(LDFLAGS) -o $@
> Is it safe to pass $(CFLAGS) and $(LDFLAGS) to $(HOSTCC) here?
> When cross-compiling, $(CFLAGS) and $(LDFLAGS) often contain target-specific
> flags. Passing these target flags to the host compiler might cause it to fail,
> or incorrectly generate binaries for the target architecture that fail to
> execute on the build host.
Sashiko found a problem here. Hmm, I would like to build the bootconfig
tool just as a tool. (like `cd tools/bootconfig; make`)
Can we identify the build (make) is called from kernel build process or not
and switching CC/CFLAGS/LDFLAGS?
>
> Additionally, since bootconfig is an administrative utility meant to be
> deployed on the target system, will permanently hardcoding $(HOSTCC) prevent
> users from cross-compiling it for their target devices?
This is also a good point. We need cros build binary and host binary.
Thank you,
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCH] rethook: Use tsk->on_cpu to check task execution state
From: Tengda Wu @ 2026-06-08 1:52 UTC (permalink / raw)
To: Masami Hiramatsu, Peter Zijlstra
Cc: Steven Rostedt, Mathieu Desnoyers, Alexei Starovoitov,
linux-trace-kernel, linux-kernel
In-Reply-To: <20260605224341.c926299d613b6102912c9a3f@kernel.org>
On 2026/6/5 21:43, Masami Hiramatsu wrote:
> On Thu, 4 Jun 2026 11:34:45 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
>
>> On Mon, Jun 01, 2026 at 08:40:01AM +0900, Masami Hiramatsu wrote:
>>
>>> Peter, is it OK to drop @rq from task_on_cpu()?
>>
>> Sure.
>>
>>> Then we can use it from rethook.
>>
>> Well, it is in sched/sched.h, which is an internal header, and no you
>> cannot use that header in rethook.
>
> Ah, OK. Hmm, then we should not use it. Maybe ->on_cpu is also internal
> state?
>
>>
>> But lets step back first, what is the actual problem here, why are we
>> looking at ->on_cpu at all?
>
> Tengda, can you explain it?
> I think you want to take a stacktrace on !current process, and
> rethook_find_ret_addr() is rejected i the task is running state.
>
> But if you can share actual situation what you need, it is
> helpful for us to understand.
>
> Thank you,
>
>
Sure.
Background: We are verifying the support of live patches for functions that
have a kretprobe. The specific verification method is as follows:
We construct a function foo() that calls bar():
void bar(void)
{
for (;;) {
schedule();
}
}
void foo(void)
{
bar();
}
A kretprobe is attached to bar():
echo 'r:rp1 bar' > /sys/kernel/tracing/kprobe_events
echo 1 > /sys/kernel/tracing/events/kprobes/rp1/enable
Then foo() is triggered. The expected behavior is that bar() will call
schedule() and yield the CPU.
After that, the live patch is activated to attempt replacing the implementation
of foo(). The expectation is that this should succeed.
However, in reality, because the task that called schedule() is still in the
RUNNING state, the condition task_is_running(tsk) inside rethook_find_ret_addr()
is not satisfied, causing the function to return early. This, in turn,
prevents stack_trace_save_tsk_reliable() from determining the stack as
reliable, leading to a failure in activating the live patch.
**Not sure if this is correct:**
We believe that after a task voluntarily calls schedule(), when the stack
is expected to be reliable, it is a safe time to activate a live patch.
Additionally, a similar tsk->on_cpu check can be found elsewhere in the
kernel (See task_on_another_cpu() in arch/x86/include/asm/unwind.h).
Therefore, we propose changing the task_is_running(tsk) condition to
tsk->on_cpu.
Thanks,
Tengda
^ permalink raw reply
* Re: [RFC PATCH v3 0/3] trace: stack trace deduplication for ftrace ring buffer
From: Li Pengfei @ 2026-06-08 2:06 UTC (permalink / raw)
To: rostedt, mhiramat
Cc: linux-trace-kernel, linux-kernel, cmllamas, zhangbo56, Pengfei Li
In-Reply-To: <cover.1779769138.git.lipengfei28@xiaomi.com>
From: Pengfei Li <lipengfei28@xiaomi.com>
Hi Steven, Masami,
Friendly ping on this v3 series. Let me know if there's anything
I should adjust or any direction concerns you'd like me to address.
Lore link:
https://lore.kernel.org/all/cover.1779769138.git.lipengfei28@xiaomi.com/
Pengfei
^ permalink raw reply
* Re: [PATCHv8 bpf-next 00/29] bpf: tracing_multi link
From: patchwork-bot+netdevbpf @ 2026-06-07 18:20 UTC (permalink / raw)
To: Jiri Olsa
Cc: ast, daniel, andrii, hengqi.chen, bpf, linux-trace-kernel,
martin.lau, eddyz87, songliubraving, yhs, menglong8.dong, rostedt
In-Reply-To: <20260606123955.345967-1-jolsa@kernel.org>
Hello:
This series was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:
On Sat, 6 Jun 2026 14:39:25 +0200 you wrote:
> hi,
> adding tracing_multi link support that allows fast attachment
> of tracing program to many functions.
>
> RFC: https://lore.kernel.org/bpf/20260203093819.2105105-1-jolsa@kernel.org/
> v1: https://lore.kernel.org/bpf/20260220100649.628307-1-jolsa@kernel.org/
> v2: https://lore.kernel.org/bpf/20260304222141.497203-1-jolsa@kernel.org/
> v3: https://lore.kernel.org/bpf/20260316075138.465430-1-jolsa@kernel.org/
> v4: https://lore.kernel.org/bpf/20260324081846.2334094-1-jolsa@kernel.org/
> v5: https://lore.kernel.org/bpf/20260417192502.194548-1-jolsa@kernel.org/
> v6: https://lore.kernel.org/bpf/20260527113951.46265-1-jolsa@kernel.org/
> v7: https://lore.kernel.org/bpf/20260603110554.29590-1-jolsa@kernel.org/
>
> [...]
Here is the summary with links:
- [PATCHv8,bpf-next,01/29] ftrace: Add ftrace_hash_count function
https://git.kernel.org/bpf/bpf-next/c/e57f13eaab25
- [PATCHv8,bpf-next,02/29] ftrace: Add ftrace_hash_remove function
https://git.kernel.org/bpf/bpf-next/c/af7c32365090
- [PATCHv8,bpf-next,03/29] ftrace: Add add_ftrace_hash_entry function
https://git.kernel.org/bpf/bpf-next/c/2cd298c106e0
- [PATCHv8,bpf-next,04/29] bpf: Use mutex lock pool for bpf trampolines
https://git.kernel.org/bpf/bpf-next/c/e6abd4cd157b
- [PATCHv8,bpf-next,05/29] bpf: Add struct bpf_trampoline_ops object
https://git.kernel.org/bpf/bpf-next/c/8a35e8db740f
- [PATCHv8,bpf-next,06/29] bpf: Move trampoline image setup into bpf_trampoline_ops callbacks
https://git.kernel.org/bpf/bpf-next/c/bf4bc3e11c41
- [PATCHv8,bpf-next,07/29] bpf: Add bpf_trampoline_add/remove_prog functions
https://git.kernel.org/bpf/bpf-next/c/e6cc9ed677e6
- [PATCHv8,bpf-next,08/29] bpf: Add struct bpf_tramp_node object
https://git.kernel.org/bpf/bpf-next/c/65499074efaf
- [PATCHv8,bpf-next,09/29] bpf: Factor fsession link to use struct bpf_tramp_node
https://git.kernel.org/bpf/bpf-next/c/880db5d4abb2
- [PATCHv8,bpf-next,10/29] bpf: Add multi tracing attach types
https://git.kernel.org/bpf/bpf-next/c/d14e6b4346bf
- [PATCHv8,bpf-next,11/29] bpf: Move sleepable verification code to btf_id_allow_sleepable
https://git.kernel.org/bpf/bpf-next/c/bd06659d3b8a
- [PATCHv8,bpf-next,12/29] bpf: Add bpf_trampoline_multi_attach/detach functions
https://git.kernel.org/bpf/bpf-next/c/aef4dfa790b2
- [PATCHv8,bpf-next,13/29] bpf: Add support for tracing multi link
https://git.kernel.org/bpf/bpf-next/c/c1d32dea5d46
- [PATCHv8,bpf-next,14/29] bpf: Add support for tracing_multi link cookies
https://git.kernel.org/bpf/bpf-next/c/46b42af27d40
- [PATCHv8,bpf-next,15/29] bpf: Add support for tracing_multi link session
https://git.kernel.org/bpf/bpf-next/c/ba042ed6446f
- [PATCHv8,bpf-next,16/29] bpf: Add support for tracing_multi link fdinfo
https://git.kernel.org/bpf/bpf-next/c/8abecdafd575
- [PATCHv8,bpf-next,17/29] libbpf: Add bpf_object_cleanup_btf function
https://git.kernel.org/bpf/bpf-next/c/fe9c8cb2b52b
- [PATCHv8,bpf-next,18/29] libbpf: Add bpf_link_create support for tracing_multi link
https://git.kernel.org/bpf/bpf-next/c/630e85a9f005
- [PATCHv8,bpf-next,19/29] libbpf: Add btf_type_is_traceable_func function
https://git.kernel.org/bpf/bpf-next/c/616a93b473a6
- [PATCHv8,bpf-next,20/29] libbpf: Add support to create tracing multi link
https://git.kernel.org/bpf/bpf-next/c/f2aa370dfe57
- [PATCHv8,bpf-next,21/29] selftests/bpf: Add tracing multi skel/pattern/ids attach tests
https://git.kernel.org/bpf/bpf-next/c/2922dd58413c
- [PATCHv8,bpf-next,22/29] selftests/bpf: Add tracing multi skel/pattern/ids module attach tests
https://git.kernel.org/bpf/bpf-next/c/2863f074f146
- [PATCHv8,bpf-next,23/29] selftests/bpf: Add tracing multi intersect tests
https://git.kernel.org/bpf/bpf-next/c/4309f580a0a6
- [PATCHv8,bpf-next,24/29] selftests/bpf: Add tracing multi cookies test
https://git.kernel.org/bpf/bpf-next/c/1b938f42f5fa
- [PATCHv8,bpf-next,25/29] selftests/bpf: Add tracing multi session test
https://git.kernel.org/bpf/bpf-next/c/69f25d4b0c17
- [PATCHv8,bpf-next,26/29] selftests/bpf: Add tracing multi attach fails test
https://git.kernel.org/bpf/bpf-next/c/1fd832854997
- [PATCHv8,bpf-next,27/29] selftests/bpf: Add tracing multi verifier fails test
https://git.kernel.org/bpf/bpf-next/c/443c91d08c4b
- [PATCHv8,bpf-next,28/29] selftests/bpf: Add tracing multi attach benchmark test
(no matching commit)
- [PATCHv8,bpf-next,29/29] selftests/bpf: Add tracing multi attach rollback tests
https://git.kernel.org/bpf/bpf-next/c/b349efe49a12
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCHv8 bpf-next 28/29] selftests/bpf: Add tracing multi attach benchmark test
From: Alexei Starovoitov @ 2026-06-07 18:13 UTC (permalink / raw)
To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260606123955.345967-29-jolsa@kernel.org>
On Sat Jun 6, 2026 at 5:39 AM PDT, Jiri Olsa wrote:
> Adding benchmark test that attaches to (almost) all allowed tracing
> functions and display attach/detach times.
>
> # ./test_progs -t tracing_multi_bench_attach -v
> bpf_testmod.ko is already unloaded.
> Loading bpf_testmod.ko...
> Successfully loaded bpf_testmod.ko.
> serial_test_tracing_multi_bench_attach:PASS:btf__load_vmlinux_btf 0 nsec
> serial_test_tracing_multi_bench_attach:PASS:tracing_multi_bench__open_and_load 0 nsec
> serial_test_tracing_multi_bench_attach:PASS:get_syms 0 nsec
> serial_test_tracing_multi_bench_attach:PASS:bpf_program__attach_tracing_multi 0 nsec
> serial_test_tracing_multi_bench_attach: found 51186 functions
> serial_test_tracing_multi_bench_attach: attached in 1.295s
> serial_test_tracing_multi_bench_attach: detached in 0.243s
...
> + if (!ASSERT_OK(bpf_get_ksyms(&ksyms, true), "get_syms"))
> + goto cleanup;
> +
> + /* Get all ftrace 'safe' symbols.. */
> + for (i = 0; i < ksyms->filtered_cnt; i++) {
> + if (!tsearch(&ksyms->filtered_syms[i], &root, compare)) {
> + ASSERT_FAIL("tsearch failed");
> + goto cleanup;
> + }
> + }
> +
> + /* ..and filter them through BTF and btf_type_is_traceable_func. */
> + nr = btf__type_cnt(btf);
> + for (type_id = 1; type_id < nr; type_id++) {
> + const struct btf_type *type;
> + const char *str;
> +
> + type = btf__type_by_id(btf, type_id);
> + if (!type)
> + break;
> +
> + if (BTF_INFO_KIND(type->info) != BTF_KIND_FUNC)
> + continue;
> +
> + str = btf__name_by_offset(btf, type->name_off);
> + if (!str)
> + break;
> +
> + if (!tfind(&str, &root, compare))
> + continue;
> +
> + if (!btf_type_is_traceable_func(btf, type))
> + continue;
> +
> + err = libbpf_ensure_mem((void **) &ids, &cap, sizeof(*ids), cnt + 1);
> + if (err)
> + goto cleanup;
> +
> + ids[cnt++] = type_id;
> + }
This filtering wasn't enough.
I've added removal of duplicates here while applying:
+ /*
+ * Collect names that are not unique in kallsyms. The kernel resolves a
+ * tracing-multi BTF id to an address with kallsyms_lookup_name(), which
+ * returns the first symbol of that name. For a duplicate name that may
+ * be a different (non-ftrace-able) instance than the ftrace-able one in
+ * available_filter_functions, so attaching to it by BTF id fails with
+ * -ENOENT (e.g. t_start/t_next/t_stop). ksyms->syms is sorted by name,
+ * so equal names are adjacent.
+ */
+ for (i = 1; i < ksyms->sym_cnt; i++) {
+ if (strcmp(ksyms->syms[i].name, ksyms->syms[i - 1].name))
+ continue;
+ if (!tsearch(&ksyms->syms[i].name, &dups, compare)) {
+ ASSERT_FAIL("tsearch failed");
+ goto cleanup;
+ }
+ }
+ /* Skip names that are not unique in kallsyms, see above. */
+ if (tfind(&str, &dups, compare))
+ continue;
As claude explains it:
----
1. The kernel attaches tracing_multi by BTF id. To get an address it resolves
the BTF function name via kallsyms_lookup_name(tname) and requires
ftrace_location(addr) — kernel/bpf/verifier.c:19380:
addr = kallsyms_lookup_name(tname);
...
if (!addr || !ftrace_location(addr))
return -ENOENT;
2. t_start/t_next/t_stop each have 5 instances in this kernel. Only one is
ftrace-able — the copies in kernel/trace/* are built notrace (ftrace's Makefile
strips -pg), so only the unrelated copy is in available_filter_functions:
3. kallsyms_lookup_name() returns the lowest-address instance among equal names
(exact strcmp, lowest seq). That instance has no fentry → ftrace_location()
returns 0 → -ENOENT, which aborts the whole all-or-nothing bench attach.
Why the bench includes them: it intersects BTF FUNC names with
available_filter_functions names. Since some t_start is ftrace-able, the name
passes the filter — but the kernel resolves the wrong (non-ftrace-able)
t_start. The author's kernel apparently had the ftrace-able copy at the lowest
address, so it passed there.
This is a pre-existing limitation, not multi-specific: single fentry attach by
BTF id uses the same kallsyms_lookup_name(tname) path (verifier.c:19120) — you
can't reliably fentry-attach to any duplicate-named function on this kernel
either.
----
Maybe we should adjust bpf_get_ksyms() instead. Not sure.
After the fix in my gcc 15 kernel with KASAN:
serial_test_tracing_multi_bench_attach: found 49045 functions
serial_test_tracing_multi_bench_attach: attached in 4.115s
serial_test_tracing_multi_bench_attach: detached in 2.313s
^ permalink raw reply
* [PATCH v3 9/9] selftests/verification: add tlob selftests
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
In-Reply-To: <cover.1780847473.git.wen.yang@linux.dev>
From: Wen Yang <wen.yang@linux.dev>
Add selftest coverage for the tlob uprobe monitoring interface under
tools/testing/selftests/verification/.
test.d/tlob/ contains both the helper sources (tlob_target, tlob_sym)
and the seven test scripts so the test suite is self-contained.
tlob_target provides busy-spin, sleep, and preempt workloads; tlob_sym
resolves ELF symbol offsets for uprobe registration.
Seven test scripts exercise uprobe binding management, budget violation
detection, and per-state time accounting (running_ns, waiting_ns,
sleeping_ns).
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
.../testing/selftests/verification/.gitignore | 2 +
tools/testing/selftests/verification/Makefile | 19 +-
.../verification/test.d/tlob/Makefile | 20 ++
.../verification/test.d/tlob/test.d/functions | 1 +
.../verification/test.d/tlob/tlob_sym.c | 189 ++++++++++++++++++
.../verification/test.d/tlob/tlob_target.c | 138 +++++++++++++
.../verification/test.d/tlob/uprobe_bind.tc | 37 ++++
.../test.d/tlob/uprobe_detail_running.tc | 51 +++++
.../test.d/tlob/uprobe_detail_sleeping.tc | 50 +++++
.../test.d/tlob/uprobe_detail_waiting.tc | 66 ++++++
.../verification/test.d/tlob/uprobe_multi.tc | 64 ++++++
.../test.d/tlob/uprobe_no_event.tc | 19 ++
.../test.d/tlob/uprobe_violation.tc | 67 +++++++
13 files changed, 722 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/verification/test.d/tlob/Makefile
create mode 100644 tools/testing/selftests/verification/test.d/tlob/test.d/functions
create mode 100644 tools/testing/selftests/verification/test.d/tlob/tlob_sym.c
create mode 100644 tools/testing/selftests/verification/test.d/tlob/tlob_target.c
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc
diff --git a/tools/testing/selftests/verification/.gitignore b/tools/testing/selftests/verification/.gitignore
index 2659417cb2c7..cbbd03ee16c7 100644
--- a/tools/testing/selftests/verification/.gitignore
+++ b/tools/testing/selftests/verification/.gitignore
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
logs
+test.d/tlob/tlob_sym
+test.d/tlob/tlob_target
diff --git a/tools/testing/selftests/verification/Makefile b/tools/testing/selftests/verification/Makefile
index aa8790c22a71..0b32bdfdb8db 100644
--- a/tools/testing/selftests/verification/Makefile
+++ b/tools/testing/selftests/verification/Makefile
@@ -1,8 +1,25 @@
# SPDX-License-Identifier: GPL-2.0
-all:
TEST_PROGS := verificationtest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
+# Subdirectories that provide binaries used by the test runner.
+# Each entry must contain a Makefile that accepts OUTDIR= and
+# deposits its binaries there.
+BUILD_SUBDIRS := test.d/tlob
+
include ../lib.mk
+
+all: $(patsubst %,_build_%,$(BUILD_SUBDIRS))
+
+clean: $(patsubst %,_clean_%,$(BUILD_SUBDIRS))
+
+.PHONY: $(patsubst %,_build_%,$(BUILD_SUBDIRS)) \
+ $(patsubst %,_clean_%,$(BUILD_SUBDIRS))
+
+$(patsubst %,_build_%,$(BUILD_SUBDIRS)): _build_%:
+ $(MAKE) -C $* OUTDIR="$(OUTPUT)" TOOLS_INCLUDES="$(TOOLS_INCLUDES)"
+
+$(patsubst %,_clean_%,$(BUILD_SUBDIRS)): _clean_%:
+ $(MAKE) -C $* OUTDIR="$(OUTPUT)" clean
diff --git a/tools/testing/selftests/verification/test.d/tlob/Makefile b/tools/testing/selftests/verification/test.d/tlob/Makefile
new file mode 100644
index 000000000000..29b3519b255f
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+# Builds tlob selftest helper binaries in the directory of this Makefile.
+#
+# Invoked by ../../Makefile via BUILD_SUBDIRS; outputs tlob_sym and
+# tlob_target alongside the .tc scripts so they are self-contained.
+
+CFLAGS += $(TOOLS_INCLUDES)
+
+.PHONY: all
+all: tlob_sym tlob_target
+
+tlob_sym: tlob_sym.c
+ $(CC) $(CFLAGS) -o $@ $<
+
+tlob_target: tlob_target.c
+ $(CC) $(CFLAGS) -o $@ $<
+
+.PHONY: clean
+clean:
+ $(RM) tlob_sym tlob_target
diff --git a/tools/testing/selftests/verification/test.d/tlob/test.d/functions b/tools/testing/selftests/verification/test.d/tlob/test.d/functions
new file mode 100644
index 000000000000..0b4c5e4344d2
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/test.d/functions
@@ -0,0 +1 @@
+. "${TOP_DIR%/*}/functions"
diff --git a/tools/testing/selftests/verification/test.d/tlob/tlob_sym.c b/tools/testing/selftests/verification/test.d/tlob/tlob_sym.c
new file mode 100644
index 000000000000..1b7ba1c6d95b
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/tlob_sym.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tlob_sym.c - ELF symbol-to-file-offset utility for tlob selftests
+ *
+ * Usage: tlob_sym sym_offset <binary> <symbol>
+ *
+ * Prints the ELF file offset of <symbol> in <binary> to stdout.
+ *
+ * Exit: 0 = found, 1 = error / not found.
+ */
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static int sym_offset(const char *binary, const char *symname)
+{
+ int fd;
+ struct stat st;
+ void *map;
+ Elf64_Ehdr *ehdr;
+ Elf32_Ehdr *ehdr32;
+ int is64;
+ uint64_t sym_vaddr = 0;
+ int found = 0;
+ uint64_t file_offset = 0;
+
+ fd = open(binary, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "open %s: %s\n", binary, strerror(errno));
+ return 1;
+ }
+ if (fstat(fd, &st) < 0) {
+ close(fd);
+ return 1;
+ }
+ map = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
+ if (map == MAP_FAILED) {
+ fprintf(stderr, "mmap: %s\n", strerror(errno));
+ return 1;
+ }
+
+ ehdr = (Elf64_Ehdr *)map;
+ ehdr32 = (Elf32_Ehdr *)map;
+ if (st.st_size < 4 ||
+ ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
+ ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
+ ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
+ ehdr->e_ident[EI_MAG3] != ELFMAG3) {
+ fprintf(stderr, "%s: not an ELF file\n", binary);
+ munmap(map, (size_t)st.st_size);
+ return 1;
+ }
+ is64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64);
+
+ if (is64) {
+ Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)map + ehdr->e_shoff);
+ Elf64_Shdr *shstrtab_hdr = &shdrs[ehdr->e_shstrndx];
+ const char *shstrtab = (char *)map + shstrtab_hdr->sh_offset;
+ int si;
+
+ for (int pass = 0; pass < 2 && !found; pass++) {
+ const char *target = pass ? ".dynsym" : ".symtab";
+
+ for (si = 0; si < ehdr->e_shnum && !found; si++) {
+ Elf64_Shdr *sh = &shdrs[si];
+ const char *name = shstrtab + sh->sh_name;
+
+ if (strcmp(name, target) != 0)
+ continue;
+
+ Elf64_Shdr *strtab_sh = &shdrs[sh->sh_link];
+ const char *strtab = (char *)map + strtab_sh->sh_offset;
+ Elf64_Sym *syms = (Elf64_Sym *)((char *)map + sh->sh_offset);
+ uint64_t nsyms = sh->sh_size / sizeof(Elf64_Sym);
+ uint64_t j;
+
+ for (j = 0; j < nsyms; j++) {
+ if (strcmp(strtab + syms[j].st_name, symname) == 0) {
+ sym_vaddr = syms[j].st_value;
+ found = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "symbol '%s' not found in %s\n", symname, binary);
+ munmap(map, (size_t)st.st_size);
+ return 1;
+ }
+
+ Elf64_Phdr *phdrs = (Elf64_Phdr *)((char *)map + ehdr->e_phoff);
+ int pi;
+
+ for (pi = 0; pi < ehdr->e_phnum; pi++) {
+ Elf64_Phdr *ph = &phdrs[pi];
+
+ if (ph->p_type != PT_LOAD)
+ continue;
+ if (sym_vaddr >= ph->p_vaddr &&
+ sym_vaddr < ph->p_vaddr + ph->p_filesz) {
+ file_offset = sym_vaddr - ph->p_vaddr + ph->p_offset;
+ break;
+ }
+ }
+ } else {
+ Elf32_Shdr *shdrs = (Elf32_Shdr *)((char *)map + ehdr32->e_shoff);
+ Elf32_Shdr *shstrtab_hdr = &shdrs[ehdr32->e_shstrndx];
+ const char *shstrtab = (char *)map + shstrtab_hdr->sh_offset;
+ int si;
+ uint32_t sym_vaddr32 = 0;
+
+ for (int pass = 0; pass < 2 && !found; pass++) {
+ const char *target = pass ? ".dynsym" : ".symtab";
+
+ for (si = 0; si < ehdr32->e_shnum && !found; si++) {
+ Elf32_Shdr *sh = &shdrs[si];
+ const char *name = shstrtab + sh->sh_name;
+
+ if (strcmp(name, target) != 0)
+ continue;
+
+ Elf32_Shdr *strtab_sh = &shdrs[sh->sh_link];
+ const char *strtab = (char *)map + strtab_sh->sh_offset;
+ Elf32_Sym *syms = (Elf32_Sym *)((char *)map + sh->sh_offset);
+ uint32_t nsyms = sh->sh_size / sizeof(Elf32_Sym);
+ uint32_t j;
+
+ for (j = 0; j < nsyms; j++) {
+ if (strcmp(strtab + syms[j].st_name, symname) == 0) {
+ sym_vaddr32 = syms[j].st_value;
+ found = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "symbol '%s' not found in %s\n", symname, binary);
+ munmap(map, (size_t)st.st_size);
+ return 1;
+ }
+
+ Elf32_Phdr *phdrs = (Elf32_Phdr *)((char *)map + ehdr32->e_phoff);
+ int pi;
+
+ for (pi = 0; pi < ehdr32->e_phnum; pi++) {
+ Elf32_Phdr *ph = &phdrs[pi];
+
+ if (ph->p_type != PT_LOAD)
+ continue;
+ if (sym_vaddr32 >= ph->p_vaddr &&
+ sym_vaddr32 < ph->p_vaddr + ph->p_filesz) {
+ file_offset = sym_vaddr32 - ph->p_vaddr + ph->p_offset;
+ break;
+ }
+ }
+ sym_vaddr = sym_vaddr32;
+ }
+
+ munmap(map, (size_t)st.st_size);
+
+ if (!file_offset && sym_vaddr) {
+ fprintf(stderr, "could not map vaddr 0x%lx to file offset\n",
+ (unsigned long)sym_vaddr);
+ return 1;
+ }
+
+ printf("0x%lx\n", (unsigned long)file_offset);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc != 4 || strcmp(argv[1], "sym_offset") != 0) {
+ fprintf(stderr, "Usage: %s sym_offset <binary> <symbol>\n", argv[0]);
+ return 1;
+ }
+ return sym_offset(argv[2], argv[3]);
+}
diff --git a/tools/testing/selftests/verification/test.d/tlob/tlob_target.c b/tools/testing/selftests/verification/test.d/tlob/tlob_target.c
new file mode 100644
index 000000000000..0fdbc575d71d
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/tlob_target.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tlob_target.c - uprobe target binary for tlob selftests.
+ *
+ * Provides three start/stop probe pairs, each designed to exercise a
+ * different dominant component of the detail_env_tlob ns breakdown:
+ *
+ * tlob_busy_work / tlob_busy_work_done - busy-spin: running_ns dominates
+ * tlob_sleep_work / tlob_sleep_work_done - nanosleep: sleeping_ns dominates
+ * tlob_preempt_work / tlob_preempt_work_done - busy-spin: waiting_ns dominates
+ * (needs an RT competitor on the same CPU)
+ *
+ * Usage: tlob_target <duration_ms> [mode]
+ *
+ * mode is one of: busy (default), sleep, preempt.
+ * Loops in 200 ms iterations until <duration_ms> has elapsed
+ * (0 = run for ~24 hours).
+ */
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+static inline int timespec_before(const struct timespec *a,
+ const struct timespec *b)
+{
+ return a->tv_sec < b->tv_sec ||
+ (a->tv_sec == b->tv_sec && a->tv_nsec < b->tv_nsec);
+}
+
+static void timespec_add_ms(struct timespec *ts, unsigned long ms)
+{
+ ts->tv_sec += ms / 1000;
+ ts->tv_nsec += (long)(ms % 1000) * 1000000L;
+ if (ts->tv_nsec >= 1000000000L) {
+ ts->tv_sec++;
+ ts->tv_nsec -= 1000000000L;
+ }
+}
+
+/* stop probe; noinline keeps the entry point visible to uprobes */
+noinline void tlob_busy_work_done(void)
+{
+ /* empty: uprobe fires on entry */
+}
+
+/* start probe; busy-spin so running_ns dominates */
+noinline void tlob_busy_work(unsigned long duration_ns)
+{
+ struct timespec start, now;
+ unsigned long elapsed;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ elapsed = (unsigned long)(now.tv_sec - start.tv_sec)
+ * 1000000000UL
+ + (unsigned long)(now.tv_nsec - start.tv_nsec);
+ } while (elapsed < duration_ns);
+
+ tlob_busy_work_done();
+}
+
+/* stop probe; noinline keeps the entry point visible to uprobes */
+noinline void tlob_sleep_work_done(void)
+{
+ /* empty: uprobe fires on entry */
+}
+
+/* start probe; nanosleep so sleeping_ns dominates */
+noinline void tlob_sleep_work(unsigned long duration_ms)
+{
+ struct timespec ts = {
+ .tv_sec = duration_ms / 1000,
+ .tv_nsec = (long)(duration_ms % 1000) * 1000000L,
+ };
+ nanosleep(&ts, NULL);
+ tlob_sleep_work_done();
+}
+
+/* stop probe; noinline keeps the entry point visible to uprobes */
+noinline void tlob_preempt_work_done(void)
+{
+ /* empty: uprobe fires on entry */
+}
+
+/*
+ * start probe; busy-spin so an RT competitor on the same CPU drives
+ * waiting_ns (prev_state==0 -> preempt event, task stays runnable off-CPU).
+ */
+noinline void tlob_preempt_work(unsigned long duration_ms)
+{
+ struct timespec start, now;
+ unsigned long elapsed;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ elapsed = (unsigned long)(now.tv_sec - start.tv_sec)
+ * 1000000000UL
+ + (unsigned long)(now.tv_nsec - start.tv_nsec);
+ } while (elapsed < duration_ms * 1000000UL);
+
+ tlob_preempt_work_done();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long duration_ms = 0;
+ const char *mode = "busy";
+ struct timespec deadline, now;
+
+ if (argc >= 2)
+ duration_ms = strtoul(argv[1], NULL, 10);
+ if (argc >= 3)
+ mode = argv[2];
+
+ clock_gettime(CLOCK_MONOTONIC, &deadline);
+ timespec_add_ms(&deadline, duration_ms ? duration_ms : 86400000UL);
+
+ do {
+ if (strcmp(mode, "sleep") == 0)
+ tlob_sleep_work(200);
+ else if (strcmp(mode, "preempt") == 0)
+ tlob_preempt_work(200);
+ else
+ tlob_busy_work(200 * 1000000UL);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while (timespec_before(&now, &deadline));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc
new file mode 100644
index 000000000000..1ac3db6ca7bb
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor uprobe binding (visible in monitor file, removable, duplicate rejected)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+busy_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+[ -n "$busy_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 30000 &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > monitors/tlob/enable
+echo "p ${UPROBE_TARGET}:${busy_offset} ${stop_offset} threshold=5000000000" > "$TLOB_MONITOR"
+
+# Binding must appear in monitor file with canonical hex-offset format.
+grep -qE "^p ${UPROBE_TARGET}:0x[0-9a-f]+ 0x[0-9a-f]+ threshold=[0-9]+$" "$TLOB_MONITOR"
+grep -q "threshold=5000000000" "$TLOB_MONITOR"
+
+# Duplicate offset_start must be rejected.
+! echo "p ${UPROBE_TARGET}:${busy_offset} ${stop_offset} threshold=9999000" > "$TLOB_MONITOR" 2>/dev/null
+
+# Remove the binding; it must no longer appear.
+echo "-${UPROBE_TARGET}:${busy_offset}" > "$TLOB_MONITOR"
+! grep -q "^p .*:0x${busy_offset#0x} " "$TLOB_MONITOR"
+
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > monitors/tlob/enable
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc
new file mode 100644
index 000000000000..2814caa34902
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor detail running (running_ns dominates when task busy-spins between probes)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+start_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+[ -n "$start_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 5000 &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# 10 µs budget; task busy-spins 200 ms per iteration -> running_ns dominates.
+echo "p ${UPROBE_TARGET}:${start_offset} ${stop_offset} threshold=10000" > "$TLOB_MONITOR"
+
+found=0; i=0
+while [ "$i" -lt 30 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${start_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+waiting=$(echo "$line" | sed 's/.*waiting_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+# Busy-spin keeps the task on-CPU: running_ns must exceed sleeping_ns.
+[ "$running" -gt "$sleeping" ]
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc
new file mode 100644
index 000000000000..0a6470b4cadb
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor detail sleeping (sleeping_ns dominates when task blocks between probes)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+start_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work_done 2>/dev/null)
+[ -n "$start_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 5000 sleep &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# 50 ms budget; task sleeps 200 ms per iteration -> sleeping_ns dominates.
+echo "p ${UPROBE_TARGET}:${start_offset} ${stop_offset} threshold=50000000" > "$TLOB_MONITOR"
+
+found=0; i=0
+while [ "$i" -lt 30 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${start_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+waiting=$(echo "$line" | sed 's/.*waiting_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+[ "$sleeping" -gt "$((running + waiting))" ]
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc
new file mode 100644
index 000000000000..ef22fce700fc
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor detail waiting (waiting_ns dominates when task is preempted between probes)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+command -v chrt > /dev/null || exit_unsupported
+command -v taskset > /dev/null || exit_unsupported
+
+start_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_preempt_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_preempt_work_done 2>/dev/null)
+[ -n "$start_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+cpu=0
+
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# Register probe before the target starts so the start uprobe fires on the
+# first entry to tlob_preempt_work. Budget: 500 ms.
+echo "p ${UPROBE_TARGET}:${start_offset} ${stop_offset} threshold=500000000" > "$TLOB_MONITOR"
+
+# Target starts; start probe fires on tlob_preempt_work entry.
+taskset -c "$cpu" "$UPROBE_TARGET" 5000 preempt &
+busy_pid=$!
+sleep 0.05
+
+# RT hog on the same CPU preempts the target; target stays in waiting state
+# (runnable, off-CPU) until the budget expires -> waiting_ns dominates.
+chrt -f 99 taskset -c "$cpu" sh -c 'while true; do :; done' 2>/dev/null &
+hog_pid=$!
+
+found=0; i=0
+while [ "$i" -lt 30 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+# Kill the RT hog first so tlob_target can release any in-flight SRCU read
+# section from uprobe_notify_resume; otherwise probe removal blocks in
+# synchronize_srcu with the hog monopolising the CPU at FIFO-99.
+kill "$hog_pid" 2>/dev/null || true; wait "$hog_pid" 2>/dev/null || true
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo "-${UPROBE_TARGET}:${start_offset}" > "$TLOB_MONITOR" 2>/dev/null
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+waiting=$(echo "$line" | sed 's/.*waiting_ns=\([0-9]*\).*/\1/')
+[ "$waiting" -gt "$((running + sleeping))" ]
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc
new file mode 100644
index 000000000000..f1bd6c955f1d
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc
@@ -0,0 +1,64 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor multiple uprobe bindings (different offsets fire independently)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+busy_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+busy_stop=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+sleep_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work 2>/dev/null)
+sleep_stop=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work_done 2>/dev/null)
+[ -n "$busy_offset" ] || exit_unsupported
+[ -n "$busy_stop" ] || exit_unsupported
+[ -n "$sleep_offset" ] || exit_unsupported
+[ -n "$sleep_stop" ] || exit_unsupported
+
+"$UPROBE_TARGET" 30000 & # busy mode: tlob_busy_work fires every 200 ms
+busy_pid=$!
+"$UPROBE_TARGET" 30000 sleep & # sleep mode: tlob_sleep_work fires every 200 ms
+sleep_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# Binding A: 5 s budget on the busy probe - must not fire in 200 ms loops.
+echo "p ${UPROBE_TARGET}:${busy_offset} ${busy_stop} threshold=5000000000" > "$TLOB_MONITOR"
+# Binding B: 10 µs budget on the sleep probe - fires on first invocation.
+echo "p ${UPROBE_TARGET}:${sleep_offset} ${sleep_stop} threshold=10000" > "$TLOB_MONITOR"
+
+# Wait up to 2 s for error_env_tlob from binding B.
+found=0; i=0
+while [ "$i" -lt 20 ]; do
+ sleep 0.1
+ grep -q "error_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${busy_offset}" > "$TLOB_MONITOR" 2>/dev/null
+echo "-${UPROBE_TARGET}:${sleep_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$sleep_pid" 2>/dev/null || true; wait "$sleep_pid" 2>/dev/null || true
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+
+echo 0 > monitors/tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+
+[ "$found" = "1" ]
+# error_env_tlob payload: clock variable must be present.
+# The event field can be "budget_exceeded" (hrtimer path) or the DA event
+# name ("sleep", "preempt") depending on which fires first; don't constrain it.
+grep "error_env_tlob" /sys/kernel/tracing/trace | head -n 1 | grep -q "clk_elapsed="
+# detail_env_tlob must appear alongside the error.
+grep -q "detail_env_tlob" /sys/kernel/tracing/trace
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc
new file mode 100644
index 000000000000..a143635a60ce
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor no spurious events without active uprobe binding
+# requires: tlob:monitor
+
+TLOB_MONITOR=monitors/tlob/monitor
+
+echo 1 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+sleep 0.5
+
+! grep -q "error_env_tlob" /sys/kernel/tracing/trace
+
+echo 0 > monitors/tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc
new file mode 100644
index 000000000000..d210d9c3a92d
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc
@@ -0,0 +1,67 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor budget violation (error_env_tlob and detail_env_tlob fire with correct fields)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+busy_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+[ -n "$busy_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 30000 &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# 10 µs budget - fires almost immediately; task is busy-spinning on-CPU.
+echo "p ${UPROBE_TARGET}:${busy_offset} ${stop_offset} threshold=10000" > "$TLOB_MONITOR"
+
+# wait up to 2 s for detail_env_tlob
+found=0; i=0
+while [ "$i" -lt 20 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${busy_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+# error_env_tlob must carry the clk_elapsed environment field.
+# The event label is "budget_exceeded" when detected by the hrtimer callback,
+# or the triggering sched event name when detected by the constraint path on a
+# preemption that races with the timer (common on PREEMPT_RT / VM). Both are
+# valid detections; check the env field instead of the label.
+grep "error_env_tlob" /sys/kernel/tracing/trace | head -n 1 | grep -q "clk_elapsed="
+
+# detail_env_tlob must have all five fields with the correct threshold
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+echo "$line" | grep -q "pid="
+echo "$line" | grep -q "threshold_ns=10000"
+echo "$line" | grep -q "running_ns="
+echo "$line" | grep -q "waiting_ns="
+echo "$line" | grep -q "sleeping_ns="
+
+# Busy-spin keeps the task on-CPU: running_ns must exceed sleeping_ns.
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+[ "$running" -gt "$sleeping" ]
+
+echo > /sys/kernel/tracing/trace
--
2.43.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox