Linux Trace Kernel
 help / color / mirror / Atom feed
* [PATCH v2 10/17] landlock: Set audit_net.sk for socket access checks
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel, stable
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Set audit_net.sk in current_check_access_socket() to provide the socket
object to audit_log_lsm_data().  This makes Landlock consistent with
AppArmor, which always sets .sk for socket operations, and with
SELinux's generic socket permission checks.

The socket's local and foreign address information (laddr, lport, faddr,
fport) is logged by the shared lsm_audit.c infrastructure when the
socket has bound or connected state.  Fields with zero values are
suppressed by print_ipv4_addr()/print_ipv6_addr(), so the audit output
is unchanged for the common case of bind denials on unbound sockets.
For connect denials after a prior bind, the bound local address (laddr,
lport) appears before the existing sockaddr fields (daddr, dest).

No existing fields are removed or reordered, and the new field names
(laddr, lport, faddr, fport) are standard audit fields already emitted
by other LSMs through the same lsm_audit.c code path.

Add net_bind and net_connect audit tests.  The net_bind test verifies
basic net denial auditing.  The net_connect test binds to an allowed
port, then connects to a denied port, and verifies that the audit record
includes laddr/lport from the socket state.

Fixes: 9f74411a40ce ("landlock: Log TCP bind and connect denials")
Cc: stable@vger.kernel.org
Cc: Günther Noack <gnoack@google.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
- New patch.
---
 security/landlock/net.c                       |   1 +
 tools/testing/selftests/landlock/audit_test.c | 187 ++++++++++++++++++
 2 files changed, 188 insertions(+)

diff --git a/security/landlock/net.c b/security/landlock/net.c
index a2aefc7967a1..d8bc9e0d012a 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -225,6 +225,7 @@ static int current_check_access_socket(struct socket *const sock,
 		return 0;
 
 	audit_net.family = address->sa_family;
+	audit_net.sk = sock->sk;
 	landlock_log_denial(subject,
 			    &(struct landlock_request){
 				    .type = LANDLOCK_REQUEST_NET_ACCESS,
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index da0bfd06391e..65dfb272c825 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -6,14 +6,17 @@
  */
 
 #define _GNU_SOURCE
+#include <arpa/inet.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <linux/landlock.h>
+#include <netinet/in.h>
 #include <pthread.h>
 #include <stdlib.h>
 #include <sys/mount.h>
 #include <sys/prctl.h>
+#include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
@@ -160,6 +163,190 @@ TEST_F(audit, layers)
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 
+static int matches_log_net_bind(struct __test_metadata *const _metadata,
+				int audit_fd, __u16 port, __u64 *domain_id)
+{
+	/*
+	 * The socket is unbound at bind() time, so laddr/lport/faddr/fport from
+	 * the socket object are zero and not printed.  Only the sockaddr fields
+	 * (src) appear.
+	 */
+	static const char log_template[] = REGEX_LANDLOCK_PREFIX
+		" blockers=net\\.bind_tcp src=%u$";
+	char log_match[sizeof(log_template) + 10];
+
+	snprintf(log_match, sizeof(log_match), log_template, port);
+	return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+				  domain_id);
+}
+
+/*
+ * Verifies that network denial audit records include enriched socket
+ * information (laddr/lport/faddr/fport) from the socket object.
+ */
+TEST_F(audit, net_bind)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+	};
+	struct landlock_net_port_attr net_port = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = 1024,
+	};
+	int status, ruleset_fd;
+	pid_t child;
+	__u64 denial_dom = 1;
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	/* Allow port 1024 only. */
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &net_port, 0));
+
+	EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		struct sockaddr_in addr = {
+			.sin_family = AF_INET,
+			.sin_port = htons(1025),
+			.sin_addr.s_addr = htonl(INADDR_ANY),
+		};
+		int sock_fd;
+
+		EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+		close(ruleset_fd);
+
+		/* Bind to port 1025 (not allowed). */
+		sock_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+		ASSERT_LE(0, sock_fd);
+		EXPECT_EQ(-1, bind(sock_fd, (struct sockaddr *)&addr,
+				   sizeof(addr)));
+		EXPECT_EQ(EACCES, errno);
+		close(sock_fd);
+
+		/* Verify audit record with enriched socket info. */
+		EXPECT_EQ(0, matches_log_net_bind(_metadata, self->audit_fd,
+						  1025, &denial_dom));
+		EXPECT_NE(denial_dom, 1);
+		EXPECT_NE(denial_dom, 0);
+
+		_exit(_metadata->exit_code);
+		return;
+	}
+
+	ASSERT_EQ(child, waitpid(child, &status, 0));
+	if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+	    WEXITSTATUS(status) != EXIT_SUCCESS)
+		_metadata->exit_code = KSFT_FAIL;
+
+	EXPECT_EQ(0, close(ruleset_fd));
+}
+
+static int matches_log_net_connect(struct __test_metadata *const _metadata,
+				   int audit_fd, __u16 denied_port,
+				   __u16 bound_port, __u64 *domain_id)
+{
+	/*
+	 * After bind(), the socket has local address state.  The audit record
+	 * should include laddr/lport from the socket (via audit_net.sk) and
+	 * daddr/dest from the connect sockaddr.
+	 */
+	static const char log_template[] = REGEX_LANDLOCK_PREFIX
+		" blockers=net\\.connect_tcp"
+		" laddr=127\\.0\\.0\\.1 lport=%u"
+		" daddr=127\\.0\\.0\\.1 dest=%u$";
+	char log_match[sizeof(log_template) + 20];
+
+	snprintf(log_match, sizeof(log_match), log_template, bound_port,
+		 denied_port);
+	return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+				  domain_id);
+}
+
+/*
+ * Verifies that network denial audit records for connect include enriched
+ * socket information (laddr/lport) from the socket object after a prior bind.
+ * This complements net_bind which tests the unbound case.
+ */
+TEST_F(audit, net_connect)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+				      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+	};
+	struct landlock_net_port_attr net_port;
+	int status, ruleset_fd;
+	pid_t child;
+	__u64 denial_dom = 1;
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	/* Allow bind to port 1024 and connect to port 1024. */
+	net_port.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+				  LANDLOCK_ACCESS_NET_CONNECT_TCP;
+	net_port.port = 1024;
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &net_port, 0));
+
+	EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		struct sockaddr_in bind_addr = {
+			.sin_family = AF_INET,
+			.sin_port = htons(1024),
+			.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		};
+		struct sockaddr_in conn_addr = {
+			.sin_family = AF_INET,
+			.sin_port = htons(1025),
+			.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		};
+		int sock_fd, optval = 1;
+
+		EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+		close(ruleset_fd);
+
+		sock_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+		ASSERT_LE(0, sock_fd);
+		ASSERT_EQ(0, setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+					&optval, sizeof(optval)));
+
+		/* Bind to allowed port 1024 (succeeds). */
+		ASSERT_EQ(0, bind(sock_fd, (struct sockaddr *)&bind_addr,
+				  sizeof(bind_addr)));
+
+		/* Connect to denied port 1025 (fails). */
+		EXPECT_EQ(-1, connect(sock_fd, (struct sockaddr *)&conn_addr,
+				      sizeof(conn_addr)));
+		EXPECT_EQ(EACCES, errno);
+		close(sock_fd);
+
+		/* Verify audit record with laddr/lport from bound socket. */
+		EXPECT_EQ(0, matches_log_net_connect(_metadata, self->audit_fd,
+						     1025, 1024, &denial_dom));
+		EXPECT_NE(denial_dom, 1);
+		EXPECT_NE(denial_dom, 0);
+
+		_exit(_metadata->exit_code);
+		return;
+	}
+
+	ASSERT_EQ(child, waitpid(child, &status, 0));
+	if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+	    WEXITSTATUS(status) != EXIT_SUCCESS)
+		_metadata->exit_code = KSFT_FAIL;
+
+	EXPECT_EQ(0, close(ruleset_fd));
+}
+
 struct thread_data {
 	pid_t parent_pid;
 	int ruleset_fd, pipe_child, pipe_parent;
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 05/17] tracing: Add __print_untrusted_str()
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Landlock tracepoints expose filesystem paths and process names
that may contain spaces, equal signs, or other characters that
break ftrace field parsing.

Add a new __print_untrusted_str() helper to safely print strings after
escaping all special characters, including common separators (space,
equal sign), quotes, and backslashes.  This transforms a string from an
untrusted source (e.g. user space) to make it:
- safe to parse,
- easy to read (for simple strings),
- easy to get back the original.

Cc: Günther Noack <gnoack@google.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
https://lore.kernel.org/r/20250523165741.693976-4-mic@digikod.net
- Remove WARN_ON() (pointed out by Steven Rostedt).
---
 include/linux/trace_events.h               |  2 ++
 include/trace/stages/stage3_trace_output.h |  4 +++
 include/trace/stages/stage7_class_define.h |  1 +
 kernel/trace/trace_output.c                | 41 ++++++++++++++++++++++
 4 files changed, 48 insertions(+)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 37eb2f0f3dd8..7f4325d327ee 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -57,6 +57,8 @@ trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str,
 			 int prefix_type, int rowsize, int groupsize,
 			 const void *buf, size_t len, bool ascii);
 
+const char *trace_print_untrusted_str_seq(struct trace_seq *s, const char *str);
+
 int trace_raw_output_prep(struct trace_iterator *iter,
 			  struct trace_event *event);
 extern __printf(2, 3)
diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h
index fce85ea2df1c..62e98babb969 100644
--- a/include/trace/stages/stage3_trace_output.h
+++ b/include/trace/stages/stage3_trace_output.h
@@ -133,6 +133,10 @@
 	trace_print_hex_dump_seq(p, prefix_str, prefix_type,		\
 				 rowsize, groupsize, buf, len, ascii)
 
+#undef __print_untrusted_str
+#define __print_untrusted_str(str)							\
+		trace_print_untrusted_str_seq(p, __get_str(str))
+
 #undef __print_ns_to_secs
 #define __print_ns_to_secs(value)			\
 	({						\
diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h
index fcd564a590f4..1164aacd550f 100644
--- a/include/trace/stages/stage7_class_define.h
+++ b/include/trace/stages/stage7_class_define.h
@@ -24,6 +24,7 @@
 #undef __print_array
 #undef __print_dynamic_array
 #undef __print_hex_dump
+#undef __print_untrusted_str
 #undef __get_buf
 
 /*
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 1996d7aba038..9d14c7cc654d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,6 +16,7 @@
 #include <linux/btf.h>
 #include <linux/bpf.h>
 #include <linux/hashtable.h>
+#include <linux/string_helpers.h>
 
 #include "trace_output.h"
 #include "trace_btf.h"
@@ -321,6 +322,46 @@ trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str,
 }
 EXPORT_SYMBOL(trace_print_hex_dump_seq);
 
+/**
+ * trace_print_untrusted_str_seq - print a string after escaping characters
+ * @s: trace seq struct to write to
+ * @src: The string to print
+ *
+ * Prints a string to a trace seq after escaping all special characters,
+ * including common separators (space, equal sign), quotes, and backslashes.
+ * This transforms a string from an untrusted source (e.g. user space) to make
+ * it:
+ * - safe to parse,
+ * - easy to read (for simple strings),
+ * - easy to get back the original.
+ */
+const char *trace_print_untrusted_str_seq(struct trace_seq *s,
+					   const char *src)
+{
+	int escaped_size;
+	char *buf;
+	size_t buf_size = seq_buf_get_buf(&s->seq, &buf);
+	const char *ret = trace_seq_buffer_ptr(s);
+
+	/* Buffer exhaustion is normal when the trace buffer is full. */
+	if (!src || buf_size == 0)
+		return NULL;
+
+	escaped_size = string_escape_mem(src, strlen(src), buf, buf_size,
+		ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_NAP | ESCAPE_APPEND |
+		ESCAPE_OCTAL, " ='\"\\");
+	if (unlikely(escaped_size >= buf_size)) {
+		/* We need some room for the final '\0'. */
+		seq_buf_set_overflow(&s->seq);
+		s->full = 1;
+		return NULL;
+	}
+	seq_buf_commit(&s->seq, escaped_size);
+	trace_seq_putc(s, 0);
+	return ret;
+}
+EXPORT_SYMBOL(trace_print_untrusted_str_seq);
+
 int trace_raw_output_prep(struct trace_iterator *iter,
 			  struct trace_event *trace_event)
 {
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 04/17] landlock: Split denial logging from audit into common framework
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Tracepoint emission requires the denial framework (layer identification,
request validation) without depending on CONFIG_AUDIT.  Separate the
denial logging infrastructure from the audit-specific code by
introducing a common log framework.

Create CONFIG_SECURITY_LANDLOCK_LOG, automatically selected when either
CONFIG_AUDIT or CONFIG_TRACEPOINTS is enabled.  The CONFIG_TRACEPOINTS
dependency is added proactively alongside the audit-to-log
generalization; a following commit adds the first tracepoint consumer.

Rename audit.c to log.c and create log.h with the request types and
struct landlock_request moved from audit.h.  Rename the
landlock_log_drop_domain() function to landlock_log_free_domain() to
match the landlock_free_domain tracepoint introduced in a following
commit.

The landlock_log_denial() declaration in log.h remains under
CONFIG_AUDIT in this patch; the guard is widened to
CONFIG_SECURITY_LANDLOCK_LOG in a following commit that adds the first
tracepoint consumer.

Move id.o from CONFIG_AUDIT to CONFIG_SECURITY_LANDLOCK_LOG so that
domain and ruleset IDs are available for tracing without audit support.

Cc: Günther Noack <gnoack@google.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
- New patch.
---
 security/landlock/Kconfig            |  5 ++
 security/landlock/Makefile           |  6 +-
 security/landlock/cred.h             |  8 ++-
 security/landlock/domain.c           |  6 +-
 security/landlock/domain.h           | 16 +++--
 security/landlock/fs.c               | 11 ++--
 security/landlock/{audit.c => log.c} | 88 +++++++++++++++++-----------
 security/landlock/{audit.h => log.h} | 12 ++--
 security/landlock/net.c              |  2 +-
 security/landlock/task.c             |  2 +-
 10 files changed, 96 insertions(+), 60 deletions(-)
 rename security/landlock/{audit.c => log.c} (95%)
 rename security/landlock/{audit.h => log.h} (86%)

diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
index 3f1493402052..7aeac29160e8 100644
--- a/security/landlock/Kconfig
+++ b/security/landlock/Kconfig
@@ -21,6 +21,11 @@ config SECURITY_LANDLOCK
 	  you should also prepend "landlock," to the content of CONFIG_LSM to
 	  enable Landlock at boot time.
 
+config SECURITY_LANDLOCK_LOG
+	bool
+	depends on SECURITY_LANDLOCK
+	default y if AUDIT || TRACEPOINTS
+
 config SECURITY_LANDLOCK_KUNIT_TEST
 	bool "KUnit tests for Landlock" if !KUNIT_ALL_TESTS
 	depends on KUNIT=y
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 23e13644916f..101440da7bcd 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -13,6 +13,6 @@ landlock-y := \
 
 landlock-$(CONFIG_INET) += net.o
 
-landlock-$(CONFIG_AUDIT) += \
-	id.o \
-	audit.o
+landlock-$(CONFIG_SECURITY_LANDLOCK_LOG) += \
+	log.o \
+	id.o
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
index c42b0d3ecec8..38299db6efa2 100644
--- a/security/landlock/cred.h
+++ b/security/landlock/cred.h
@@ -36,13 +36,15 @@ struct landlock_cred_security {
 	 */
 	struct landlock_domain *domain;
 
-#ifdef CONFIG_AUDIT
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
 	/**
 	 * @domain_exec: Bitmask identifying the domain layers that were enforced by
 	 * the current task's executed file (i.e. no new execve(2) since
 	 * landlock_restrict_self(2)).
 	 */
 	u16 domain_exec;
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
+#ifdef CONFIG_AUDIT
 	/**
 	 * @log_subdomains_off: Set if the domain descendants's log_status should be
 	 * set to %LANDLOCK_LOG_DISABLED.  This is not a landlock_hierarchy
@@ -53,14 +55,14 @@ struct landlock_cred_security {
 #endif /* CONFIG_AUDIT */
 } __packed;
 
-#ifdef CONFIG_AUDIT
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
 
 /* Makes sure all layer executions can be stored. */
 static_assert(BITS_PER_TYPE(typeof_member(struct landlock_cred_security,
 					  domain_exec)) >=
 	      LANDLOCK_MAX_NUM_LAYERS);
 
-#endif /* CONFIG_AUDIT */
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
 
 static inline struct landlock_cred_security *
 landlock_cred(const struct cred *cred)
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index 317fd94d3ccd..0dfd53ae9dd7 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -451,7 +451,7 @@ landlock_merge_ruleset(struct landlock_domain *const parent,
 	return no_free_ptr(new_dom);
 }
 
-#ifdef CONFIG_AUDIT
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
 
 /**
  * get_current_exe - Get the current's executable path, if any
@@ -561,6 +561,10 @@ int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy)
 	return 0;
 }
 
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
+
+#ifdef CONFIG_AUDIT
+
 static deny_masks_t
 get_layer_deny_mask(const access_mask_t all_existing_optional_access,
 		    const unsigned long access_bit, const size_t layer)
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index df11cb7d4f2b..56f54efb65d1 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -21,7 +21,7 @@
 #include <linux/workqueue.h>
 
 #include "access.h"
-#include "audit.h"
+#include "log.h"
 #include "ruleset.h"
 
 enum landlock_log_status {
@@ -87,7 +87,7 @@ struct landlock_hierarchy {
 	 */
 	refcount_t usage;
 
-#ifdef CONFIG_AUDIT
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
 	/**
 	 * @log_status: Whether this domain should be logged or not.  Because
 	 * concurrent log entries may be created at the same time, it is still
@@ -117,7 +117,7 @@ struct landlock_hierarchy {
 		 * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON.  Set to false by default.
 		 */
 		log_new_exec : 1;
-#endif /* CONFIG_AUDIT */
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
 };
 
 #ifdef CONFIG_AUDIT
@@ -127,6 +127,10 @@ landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
 			const access_mask_t optional_access,
 			const struct layer_access_masks *const masks);
 
+#endif /* CONFIG_AUDIT */
+
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
+
 int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy);
 
 static inline void
@@ -139,7 +143,7 @@ landlock_free_hierarchy_details(struct landlock_hierarchy *const hierarchy)
 	kfree(hierarchy->details);
 }
 
-#else /* CONFIG_AUDIT */
+#else /* CONFIG_SECURITY_LANDLOCK_LOG */
 
 static inline int
 landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy)
@@ -152,7 +156,7 @@ landlock_free_hierarchy_details(struct landlock_hierarchy *const hierarchy)
 {
 }
 
-#endif /* CONFIG_AUDIT */
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
 
 static inline void
 landlock_get_hierarchy(struct landlock_hierarchy *const hierarchy)
@@ -166,7 +170,7 @@ static inline void landlock_put_hierarchy(struct landlock_hierarchy *hierarchy)
 	while (hierarchy && refcount_dec_and_test(&hierarchy->usage)) {
 		const struct landlock_hierarchy *const freeme = hierarchy;
 
-		landlock_log_drop_domain(hierarchy);
+		landlock_log_free_domain(hierarchy);
 		landlock_free_hierarchy_details(hierarchy);
 		hierarchy = hierarchy->parent;
 		kfree(freeme);
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 3ef453fc14a6..a0b4d0dd261f 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -42,12 +42,12 @@
 #include <uapi/linux/landlock.h>
 
 #include "access.h"
-#include "audit.h"
 #include "common.h"
 #include "cred.h"
 #include "domain.h"
 #include "fs.h"
 #include "limits.h"
+#include "log.h"
 #include "object.h"
 #include "ruleset.h"
 #include "setup.h"
@@ -918,10 +918,11 @@ is_access_to_paths_allowed(const struct landlock_domain *const domain,
 	path_put(&walker_path);
 
 	/*
-	 * Check CONFIG_AUDIT to enable elision of log_request_parent* and
-	 * associated caller's stack variables thanks to dead code elimination.
+	 * Check CONFIG_SECURITY_LANDLOCK_LOG to enable elision of
+	 * log_request_parent* and associated caller's stack variables thanks to
+	 * dead code elimination.
 	 */
-#ifdef CONFIG_AUDIT
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
 	if (!allowed_parent1 && log_request_parent1) {
 		log_request_parent1->type = LANDLOCK_REQUEST_FS_ACCESS;
 		log_request_parent1->audit.type = LSM_AUDIT_DATA_PATH;
@@ -937,7 +938,7 @@ is_access_to_paths_allowed(const struct landlock_domain *const domain,
 		log_request_parent2->access = access_masked_parent2;
 		log_request_parent2->layer_masks = layer_masks_parent2;
 	}
-#endif /* CONFIG_AUDIT */
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
 
 	return allowed_parent1 && allowed_parent2;
 }
diff --git a/security/landlock/audit.c b/security/landlock/log.c
similarity index 95%
rename from security/landlock/audit.c
rename to security/landlock/log.c
index 75438b3cc887..c9b506707af0 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/log.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Landlock - Audit helpers
+ * Landlock - Log helpers
  *
  * Copyright © 2023-2025 Microsoft Corporation
  */
@@ -13,12 +13,13 @@
 #include <uapi/linux/landlock.h>
 
 #include "access.h"
-#include "audit.h"
 #include "common.h"
 #include "cred.h"
 #include "domain.h"
 #include "limits.h"
+#include "log.h"
 #include "ruleset.h"
+#ifdef CONFIG_AUDIT
 
 static const char *const fs_access_strings[] = {
 	[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = "fs.execute",
@@ -134,6 +135,45 @@ static void log_domain(struct landlock_hierarchy *const hierarchy)
 	WRITE_ONCE(hierarchy->log_status, LANDLOCK_LOG_RECORDED);
 }
 
+static void audit_denial(const struct landlock_cred_security *const subject,
+			 const struct landlock_request *const request,
+			 struct landlock_hierarchy *const youngest_denied,
+			 const size_t youngest_layer,
+			 const access_mask_t missing)
+{
+	struct audit_buffer *ab;
+
+	if (!audit_enabled)
+		return;
+
+	/* Checks if the current exec was restricting itself. */
+	if (subject->domain_exec & BIT(youngest_layer)) {
+		/* Ignores denials for the same execution. */
+		if (!youngest_denied->log_same_exec)
+			return;
+	} else {
+		/* Ignores denials after a new execution. */
+		if (!youngest_denied->log_new_exec)
+			return;
+	}
+
+	/* Uses consistent allocation flags wrt common_lsm_audit(). */
+	ab = audit_log_start(audit_context(), GFP_ATOMIC | __GFP_NOWARN,
+			     AUDIT_LANDLOCK_ACCESS);
+	if (!ab)
+		return;
+
+	audit_log_format(ab, "domain=%llx blockers=", youngest_denied->id);
+	log_blockers(ab, request->type, missing);
+	audit_log_lsm_data(ab, &request->audit);
+	audit_log_end(ab);
+
+	/* Logs this domain the first time it shows in log. */
+	log_domain(youngest_denied);
+}
+
+#endif /* CONFIG_AUDIT */
+
 static struct landlock_hierarchy *
 get_hierarchy(const struct landlock_domain *const domain, const size_t layer)
 {
@@ -352,7 +392,7 @@ static bool is_valid_request(const struct landlock_request *const request)
 }
 
 /**
- * landlock_log_denial - Create audit records related to a denial
+ * landlock_log_denial - Log a denied access
  *
  * @subject: The Landlock subject's credential denying an action.
  * @request: Detail of the user space request.
@@ -360,7 +400,6 @@ static bool is_valid_request(const struct landlock_request *const request)
 void landlock_log_denial(const struct landlock_cred_security *const subject,
 			 const struct landlock_request *const request)
 {
-	struct audit_buffer *ab;
 	struct landlock_hierarchy *youngest_denied;
 	size_t youngest_layer;
 	access_mask_t missing;
@@ -403,37 +442,16 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
 	 */
 	atomic64_inc(&youngest_denied->num_denials);
 
-	if (!audit_enabled)
-		return;
-
-	/* Checks if the current exec was restricting itself. */
-	if (subject->domain_exec & BIT(youngest_layer)) {
-		/* Ignores denials for the same execution. */
-		if (!youngest_denied->log_same_exec)
-			return;
-	} else {
-		/* Ignores denials after a new execution. */
-		if (!youngest_denied->log_new_exec)
-			return;
-	}
-
-	/* Uses consistent allocation flags wrt common_lsm_audit(). */
-	ab = audit_log_start(audit_context(), GFP_ATOMIC | __GFP_NOWARN,
-			     AUDIT_LANDLOCK_ACCESS);
-	if (!ab)
-		return;
-
-	audit_log_format(ab, "domain=%llx blockers=", youngest_denied->id);
-	log_blockers(ab, request->type, missing);
-	audit_log_lsm_data(ab, &request->audit);
-	audit_log_end(ab);
-
-	/* Logs this domain the first time it shows in log. */
-	log_domain(youngest_denied);
+#ifdef CONFIG_AUDIT
+	audit_denial(subject, request, youngest_denied, youngest_layer,
+		     missing);
+#endif /* CONFIG_AUDIT */
 }
 
+#ifdef CONFIG_AUDIT
+
 /**
- * landlock_log_drop_domain - Create an audit record on domain deallocation
+ * landlock_log_free_domain - Create an audit record on domain deallocation
  *
  * @hierarchy: The domain's hierarchy being deallocated.
  *
@@ -443,7 +461,7 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
  * Called in a work queue scheduled by landlock_put_domain_deferred() called by
  * hook_cred_free().
  */
-void landlock_log_drop_domain(const struct landlock_hierarchy *const hierarchy)
+void landlock_log_free_domain(const struct landlock_hierarchy *const hierarchy)
 {
 	struct audit_buffer *ab;
 
@@ -471,6 +489,8 @@ void landlock_log_drop_domain(const struct landlock_hierarchy *const hierarchy)
 	audit_log_end(ab);
 }
 
+#endif /* CONFIG_AUDIT */
+
 #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST
 
 static struct kunit_case test_cases[] = {
@@ -483,7 +503,7 @@ static struct kunit_case test_cases[] = {
 };
 
 static struct kunit_suite test_suite = {
-	.name = "landlock_audit",
+	.name = "landlock_log",
 	.test_cases = test_cases,
 };
 
diff --git a/security/landlock/audit.h b/security/landlock/log.h
similarity index 86%
rename from security/landlock/audit.h
rename to security/landlock/log.h
index 50452a791656..4370fff86e45 100644
--- a/security/landlock/audit.h
+++ b/security/landlock/log.h
@@ -1,12 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Landlock - Audit helpers
+ * Landlock - Log helpers
  *
  * Copyright © 2023-2025 Microsoft Corporation
  */
 
-#ifndef _SECURITY_LANDLOCK_AUDIT_H
-#define _SECURITY_LANDLOCK_AUDIT_H
+#ifndef _SECURITY_LANDLOCK_LOG_H
+#define _SECURITY_LANDLOCK_LOG_H
 
 #include <linux/audit.h>
 #include <linux/lsm_audit.h>
@@ -54,7 +54,7 @@ struct landlock_request {
 
 #ifdef CONFIG_AUDIT
 
-void landlock_log_drop_domain(const struct landlock_hierarchy *const hierarchy);
+void landlock_log_free_domain(const struct landlock_hierarchy *const hierarchy);
 
 void landlock_log_denial(const struct landlock_cred_security *const subject,
 			 const struct landlock_request *const request);
@@ -62,7 +62,7 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
 #else /* CONFIG_AUDIT */
 
 static inline void
-landlock_log_drop_domain(const struct landlock_hierarchy *const hierarchy)
+landlock_log_free_domain(const struct landlock_hierarchy *const hierarchy)
 {
 }
 
@@ -74,4 +74,4 @@ landlock_log_denial(const struct landlock_cred_security *const subject,
 
 #endif /* CONFIG_AUDIT */
 
-#endif /* _SECURITY_LANDLOCK_AUDIT_H */
+#endif /* _SECURITY_LANDLOCK_LOG_H */
diff --git a/security/landlock/net.c b/security/landlock/net.c
index de108b3277bc..63f1fe0ec876 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -12,11 +12,11 @@
 #include <linux/socket.h>
 #include <net/ipv6.h>
 
-#include "audit.h"
 #include "common.h"
 #include "cred.h"
 #include "domain.h"
 #include "limits.h"
+#include "log.h"
 #include "net.h"
 #include "ruleset.h"
 
diff --git a/security/landlock/task.c b/security/landlock/task.c
index 2e7ee62958b2..5bfbbe6107ce 100644
--- a/security/landlock/task.c
+++ b/security/landlock/task.c
@@ -20,11 +20,11 @@
 #include <net/af_unix.h>
 #include <net/sock.h>
 
-#include "audit.h"
 #include "common.h"
 #include "cred.h"
 #include "domain.h"
 #include "fs.h"
+#include "log.h"
 #include "ruleset.h"
 #include "setup.h"
 #include "task.h"
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 09/17] landlock: Add tracepoints for rule checking
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Merge landlock_find_rule() into landlock_unmask_layers() to consolidate
rule finding into unmask checking.  landlock_unmask_layers() now takes a
landlock_id and the domain instead of a rule pointer.

This enables us to not deal with Landlock rule pointers outside of the
domain implementation, to avoid two calls, and to get all required
information available to landlock_unmask_layers().

Use the per-type tracepoint wrappers unmask_layers_fs() and
unmask_layers_net() to emit tracepoints recording which rules matched
and what access masks were fulfilled.

Setting allowed_parent2 to true for non-dom-check requests when
get_inode_id() returns false preserves the pre-refactoring behavior: a
negative dentry (no backing inode) has no matching rule, so the access
is allowed at this path component.  Before the refactoring,
landlock_unmask_layers() with a NULL rule produced this result as a side
effect; now the caller must set it explicitly.

The check_rule tracepoints add up to 80 bytes of stack in the access
check path (dynamic layers array in TP_STRUCT__entry).  This cost is
only paid when a tracer is attached; the static branch is not taken
otherwise.

Cc: Günther Noack <gnoack@google.com>
Cc: Justin Suess <utilityemal77@gmail.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
https://lore.kernel.org/r/20250523165741.693976-6-mic@digikod.net
- Merged find-rule consolidation (v1 2/5) into this patch.
- Added check_rule_net tracepoint for network rules.
- Added get_inode_id() helper with rcu_access_pointer().
- Added allowed_parent2 behavioral fix.
---
 include/trace/events/landlock.h |  99 ++++++++++++++++++++++
 security/landlock/domain.c      |  32 ++++---
 security/landlock/domain.h      |  10 +--
 security/landlock/fs.c          | 145 +++++++++++++++++++++++---------
 security/landlock/net.c         |  21 ++++-
 5 files changed, 246 insertions(+), 61 deletions(-)

diff --git a/include/trace/events/landlock.h b/include/trace/events/landlock.h
index 533aea6152e1..e7bb8fa802bf 100644
--- a/include/trace/events/landlock.h
+++ b/include/trace/events/landlock.h
@@ -12,8 +12,10 @@
 
 #include <linux/tracepoint.h>
 
+struct dentry;
 struct landlock_domain;
 struct landlock_hierarchy;
+struct landlock_rule;
 struct landlock_ruleset;
 struct path;
 
@@ -234,6 +236,103 @@ TRACE_EVENT(landlock_free_domain,
 	    TP_printk("domain=%llx denials=%llu", __entry->domain_id,
 		      __entry->denials));
 
+/**
+ * landlock_check_rule_fs - filesystem rule evaluated during access check
+ * @domain: Enforcing domain (never NULL)
+ * @dentry: Filesystem dentry being checked (never NULL)
+ * @access_request: Access mask being requested
+ * @rule: Matching rule with per-layer access masks (never NULL)
+ *
+ * Emitted for each rule that matches during a filesystem access check.
+ * The layers array shows the allowed access mask at each domain layer.
+ */
+TRACE_EVENT(landlock_check_rule_fs,
+
+	    TP_PROTO(const struct landlock_domain *domain,
+		     const struct dentry *dentry, access_mask_t access_request,
+		     const struct landlock_rule *rule),
+
+	    TP_ARGS(domain, dentry, access_request, rule),
+
+	    TP_STRUCT__entry(__field(__u64, domain_id) __field(
+		    access_mask_t,
+		    access_request) __field(dev_t, dev) __field(ino_t, ino)
+				     __dynamic_array(access_mask_t, layers,
+						     domain->num_layers)),
+
+	    TP_fast_assign(__entry->domain_id = domain->hierarchy->id;
+			   __entry->access_request = access_request;
+			   __entry->dev = dentry->d_sb->s_dev;
+			   __entry->ino = d_backing_inode(dentry)->i_ino;
+
+			   for (size_t level = 1, i = 0;
+				level <= __get_dynamic_array_len(layers) /
+						 sizeof(access_mask_t);
+				level++) {
+				   access_mask_t allowed;
+
+				   if (i < rule->num_layers &&
+				       level == rule->layers[i].level) {
+					   allowed = rule->layers[i].access;
+					   i++;
+				   } else {
+					   allowed = 0;
+				   }
+				   ((access_mask_t *)__get_dynamic_array(
+					   layers))[level - 1] = allowed;
+			   }),
+
+	    TP_printk("domain=%llx request=0x%x dev=%u:%u ino=%lu allowed=%s",
+		      __entry->domain_id, __entry->access_request,
+		      MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
+		      __print_dynamic_array(layers, sizeof(access_mask_t))));
+
+/**
+ * landlock_check_rule_net - network port rule evaluated during access check
+ * @domain: Enforcing domain (never NULL)
+ * @port: Network port being checked (host endianness)
+ * @access_request: Access mask being requested
+ * @rule: Matching rule with per-layer access masks (never NULL)
+ */
+TRACE_EVENT(landlock_check_rule_net,
+
+	    TP_PROTO(const struct landlock_domain *domain, __u64 port,
+		     access_mask_t access_request,
+		     const struct landlock_rule *rule),
+
+	    TP_ARGS(domain, port, access_request, rule),
+
+	    TP_STRUCT__entry(__field(__u64, domain_id) __field(
+		    access_mask_t, access_request) __field(__u64, port)
+				     __dynamic_array(access_mask_t, layers,
+						     domain->num_layers)),
+
+	    TP_fast_assign(__entry->domain_id = domain->hierarchy->id;
+			   __entry->access_request = access_request;
+			   __entry->port = port;
+
+			   for (size_t level = 1, i = 0;
+				level <= __get_dynamic_array_len(layers) /
+						 sizeof(access_mask_t);
+				level++) {
+				   access_mask_t allowed;
+
+				   if (i < rule->num_layers &&
+				       level == rule->layers[i].level) {
+					   allowed = rule->layers[i].access;
+					   i++;
+				   } else {
+					   allowed = 0;
+				   }
+				   ((access_mask_t *)__get_dynamic_array(
+					   layers))[level - 1] = allowed;
+			   }),
+
+	    TP_printk("domain=%llx request=0x%x port=%llu allowed=%s",
+		      __entry->domain_id, __entry->access_request,
+		      __entry->port,
+		      __print_dynamic_array(layers, sizeof(access_mask_t))));
+
 #endif /* _TRACE_LANDLOCK_H */
 
 /* This part must be outside protection */
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index 45ee7ec87957..e8d82b8a14a3 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -98,9 +98,9 @@ void landlock_put_domain_deferred(struct landlock_domain *const domain)
 }
 
 /* The returned access has the same lifetime as @domain. */
-const struct landlock_rule *
-landlock_find_rule(const struct landlock_domain *const domain,
-		   const struct landlock_id id)
+static const struct landlock_rule *
+find_rule(const struct landlock_domain *const domain,
+	  const struct landlock_id id)
 {
 	const struct rb_root *root;
 	const struct rb_node *node;
@@ -127,26 +127,38 @@ landlock_find_rule(const struct landlock_domain *const domain,
 
 /**
  * landlock_unmask_layers - Remove the access rights in @masks which are
- *                          granted in @rule
+ *                          granted by a matching rule
  *
- * Updates the set of (per-layer) unfulfilled access rights @masks so that all
- * the access rights granted in @rule are removed from it (because they are now
- * fulfilled).
+ * Looks up the rule matching @id in @domain, then updates the set of
+ * (per-layer) unfulfilled access rights @masks so that all the access rights
+ * granted by that rule are removed (because they are now fulfilled).
  *
- * @rule: A rule that grants a set of access rights for each layer.
+ * @domain: The Landlock domain to search for a matching rule.
+ * @id: Identifier for the rule target (e.g. inode, port).
  * @masks: A matrix of unfulfilled access rights for each layer.
+ * @matched_rule: Optional output for the matched rule (for tracing); set to
+ *                the matching rule when non-NULL, unchanged otherwise.
  *
  * Return: True if the request is allowed (i.e. the access rights granted all
  * remaining unfulfilled access rights and masks has no leftover set bits).
  */
-bool landlock_unmask_layers(const struct landlock_rule *const rule,
-			    struct layer_access_masks *masks)
+bool landlock_unmask_layers(const struct landlock_domain *const domain,
+			    const struct landlock_id id,
+			    struct layer_access_masks *masks,
+			    const struct landlock_rule **matched_rule)
 {
+	const struct landlock_rule *rule;
+
 	if (!masks)
 		return true;
+
+	rule = find_rule(domain, id);
 	if (!rule)
 		return false;
 
+	if (matched_rule)
+		*matched_rule = rule;
+
 	/*
 	 * An access is granted if, for each policy layer, at least one rule
 	 * encountered on the pathwalk grants the requested access, regardless
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index 56f54efb65d1..35abae29677c 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -289,12 +289,10 @@ struct landlock_domain *
 landlock_merge_ruleset(struct landlock_domain *const parent,
 		       struct landlock_ruleset *const ruleset);
 
-const struct landlock_rule *
-landlock_find_rule(const struct landlock_domain *const domain,
-		   const struct landlock_id id);
-
-bool landlock_unmask_layers(const struct landlock_rule *const rule,
-			    struct layer_access_masks *masks);
+bool landlock_unmask_layers(const struct landlock_domain *const domain,
+			    const struct landlock_id id,
+			    struct layer_access_masks *masks,
+			    const struct landlock_rule **matched_rule);
 
 access_mask_t
 landlock_init_layer_masks(const struct landlock_domain *const domain,
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index f627ecc537a5..fe211656f6d9 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -375,31 +375,55 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
 
 /* Access-control management */
 
-/*
- * The lifetime of the returned rule is tied to @domain.
+/**
+ * get_inode_id - Look up the Landlock object for a dentry
+ * @dentry: The dentry to look up.
+ * @id: Filled with the inode's Landlock object pointer on success.
+ *
+ * Extracts the Landlock object pointer from @dentry's inode security blob and
+ * stores it in @id for use as a rule-tree lookup key.
+ *
+ * When this returns false (negative dentry or no Landlock object), no rule can
+ * match this inode, so landlock_unmask_layers() need not be called.  Callers
+ * that gate landlock_unmask_layers() on this function must handle the NULL
+ * @masks case independently, since the !masks-returns-true early-return in
+ * landlock_unmask_layers() will not be reached.  See the allowed_parent2
+ * initialization in is_access_to_paths_allowed().
  *
- * Returns NULL if no rule is found or if @dentry is negative.
+ * Return: True if a Landlock object exists for @dentry, false otherwise.
  */
-static const struct landlock_rule *
-find_rule(const struct landlock_domain *const domain,
-	  const struct dentry *const dentry)
+static bool get_inode_id(const struct dentry *const dentry,
+			 struct landlock_id *id)
 {
-	const struct landlock_rule *rule;
-	const struct inode *inode;
-	struct landlock_id id = {
-		.type = LANDLOCK_KEY_INODE,
-	};
-
 	/* Ignores nonexistent leafs. */
 	if (d_is_negative(dentry))
-		return NULL;
+		return false;
 
-	inode = d_backing_inode(dentry);
-	rcu_read_lock();
-	id.key.object = rcu_dereference(landlock_inode(inode)->object);
-	rule = landlock_find_rule(domain, id);
-	rcu_read_unlock();
-	return rule;
+	/*
+	 * rcu_access_pointer() is sufficient: the pointer is used only
+	 * as a numeric comparison key for rule lookup, not dereferenced.
+	 * The object cannot be freed while the domain exists because the
+	 * domain's rule tree holds its own reference to it.
+	 */
+	id->key.object = rcu_access_pointer(
+		landlock_inode(d_backing_inode(dentry))->object);
+	return !!id->key.object;
+}
+
+static bool unmask_layers_fs(const struct landlock_domain *const domain,
+			     const struct landlock_id id,
+			     const access_mask_t access_request,
+			     struct layer_access_masks *masks,
+			     const struct dentry *const dentry)
+{
+	const struct landlock_rule *rule = NULL;
+	bool ret;
+
+	ret = landlock_unmask_layers(domain, id, masks, &rule);
+	if (rule)
+		trace_landlock_check_rule_fs(domain, dentry, access_request,
+					     rule);
+	return ret;
 }
 
 /*
@@ -771,6 +795,9 @@ is_access_to_paths_allowed(const struct landlock_domain *const domain,
 	bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
 	     child1_is_directory = true, child2_is_directory = true;
 	struct path walker_path;
+	struct landlock_id id = {
+		.type = LANDLOCK_KEY_INODE,
+	};
 	access_mask_t access_masked_parent1, access_masked_parent2;
 	struct layer_access_masks _layer_masks_child1, _layer_masks_child2;
 	struct layer_access_masks *layer_masks_child1 = NULL,
@@ -810,24 +837,46 @@ is_access_to_paths_allowed(const struct landlock_domain *const domain,
 		/* For a simple request, only check for requested accesses. */
 		access_masked_parent1 = access_request_parent1;
 		access_masked_parent2 = access_request_parent2;
+		/*
+		 * Simple requests have no parent2 to check, so parent2 is
+		 * trivially allowed.  This must be set explicitly because the
+		 * get_inode_id() gate in the pathwalk loop may prevent
+		 * landlock_unmask_layers() from being called (which would
+		 * otherwise return true for NULL masks as a side effect).
+		 */
+		allowed_parent2 = true;
 		is_dom_check = false;
 	}
 
 	if (unlikely(dentry_child1)) {
-		if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
-					      &_layer_masks_child1,
-					      LANDLOCK_KEY_INODE))
-			landlock_unmask_layers(find_rule(domain, dentry_child1),
-					       &_layer_masks_child1);
+		struct landlock_id id = {
+			.type = LANDLOCK_KEY_INODE,
+		};
+		access_mask_t handled;
+
+		handled = landlock_init_layer_masks(domain,
+						    LANDLOCK_MASK_ACCESS_FS,
+						    &_layer_masks_child1,
+						    LANDLOCK_KEY_INODE);
+		if (handled && get_inode_id(dentry_child1, &id))
+			unmask_layers_fs(domain, id, handled,
+					 &_layer_masks_child1, dentry_child1);
 		layer_masks_child1 = &_layer_masks_child1;
 		child1_is_directory = d_is_dir(dentry_child1);
 	}
 	if (unlikely(dentry_child2)) {
-		if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
-					      &_layer_masks_child2,
-					      LANDLOCK_KEY_INODE))
-			landlock_unmask_layers(find_rule(domain, dentry_child2),
-					       &_layer_masks_child2);
+		struct landlock_id id = {
+			.type = LANDLOCK_KEY_INODE,
+		};
+		access_mask_t handled;
+
+		handled = landlock_init_layer_masks(domain,
+						    LANDLOCK_MASK_ACCESS_FS,
+						    &_layer_masks_child2,
+						    LANDLOCK_KEY_INODE);
+		if (handled && get_inode_id(dentry_child2, &id))
+			unmask_layers_fs(domain, id, handled,
+					 &_layer_masks_child2, dentry_child2);
 		layer_masks_child2 = &_layer_masks_child2;
 		child2_is_directory = d_is_dir(dentry_child2);
 	}
@@ -839,8 +888,6 @@ is_access_to_paths_allowed(const struct landlock_domain *const domain,
 	 * restriction.
 	 */
 	while (true) {
-		const struct landlock_rule *rule;
-
 		/*
 		 * If at least all accesses allowed on the destination are
 		 * already allowed on the source, respectively if there is at
@@ -881,13 +928,20 @@ is_access_to_paths_allowed(const struct landlock_domain *const domain,
 				break;
 		}
 
-		rule = find_rule(domain, walker_path.dentry);
-		allowed_parent1 =
-			allowed_parent1 ||
-			landlock_unmask_layers(rule, layer_masks_parent1);
-		allowed_parent2 =
-			allowed_parent2 ||
-			landlock_unmask_layers(rule, layer_masks_parent2);
+		if (get_inode_id(walker_path.dentry, &id)) {
+			allowed_parent1 =
+				allowed_parent1 ||
+				unmask_layers_fs(domain, id,
+						 access_masked_parent1,
+						 layer_masks_parent1,
+						 walker_path.dentry);
+			allowed_parent2 =
+				allowed_parent2 ||
+				unmask_layers_fs(domain, id,
+						 access_masked_parent2,
+						 layer_masks_parent2,
+						 walker_path.dentry);
+		}
 
 		/* Stops when a rule from each layer grants access. */
 		if (allowed_parent1 && allowed_parent2)
@@ -1050,23 +1104,30 @@ static bool collect_domain_accesses(const struct landlock_domain *const domain,
 				    struct layer_access_masks *layer_masks_dom)
 {
 	bool ret = false;
+	access_mask_t access_masked_dom;
 
 	if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
 		return true;
 	if (is_nouser_or_private(dir))
 		return true;
 
-	if (!landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
-				       layer_masks_dom, LANDLOCK_KEY_INODE))
+	access_masked_dom =
+		landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+					  layer_masks_dom, LANDLOCK_KEY_INODE);
+	if (!access_masked_dom)
 		return true;
 
 	dget(dir);
 	while (true) {
 		struct dentry *parent_dentry;
+		struct landlock_id id = {
+			.type = LANDLOCK_KEY_INODE,
+		};
 
 		/* Gets all layers allowing all domain accesses. */
-		if (landlock_unmask_layers(find_rule(domain, dir),
-					   layer_masks_dom)) {
+		if (get_inode_id(dir, &id) &&
+		    unmask_layers_fs(domain, id, access_masked_dom,
+				     layer_masks_dom, dir)) {
 			/*
 			 * Stops when all handled accesses are allowed by at
 			 * least one rule in each layer.
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 1e893123e787..a2aefc7967a1 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -53,6 +53,22 @@ int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
 	return err;
 }
 
+static bool unmask_layers_net(const struct landlock_domain *const domain,
+			      const struct landlock_id id,
+			      struct layer_access_masks *masks,
+			      access_mask_t access_request)
+{
+	const struct landlock_rule *rule = NULL;
+	bool ret;
+
+	ret = landlock_unmask_layers(domain, id, masks, &rule);
+	if (rule)
+		trace_landlock_check_rule_net(
+			domain, ntohs((__force __be16)id.key.data),
+			access_request, rule);
+	return ret;
+}
+
 static int current_check_access_socket(struct socket *const sock,
 				       struct sockaddr *const address,
 				       const int addrlen,
@@ -60,7 +76,6 @@ static int current_check_access_socket(struct socket *const sock,
 {
 	__be16 port;
 	struct layer_access_masks layer_masks = {};
-	const struct landlock_rule *rule;
 	struct landlock_id id = {
 		.type = LANDLOCK_KEY_NET_PORT,
 	};
@@ -199,14 +214,14 @@ static int current_check_access_socket(struct socket *const sock,
 	id.key.data = (__force uintptr_t)port;
 	BUILD_BUG_ON(sizeof(port) > sizeof(id.key.data));
 
-	rule = landlock_find_rule(subject->domain, id);
 	access_request = landlock_init_layer_masks(subject->domain,
 						   access_request, &layer_masks,
 						   LANDLOCK_KEY_NET_PORT);
 	if (!access_request)
 		return 0;
 
-	if (landlock_unmask_layers(rule, &layer_masks))
+	if (unmask_layers_net(subject->domain, id, &layer_masks,
+			      access_request))
 		return 0;
 
 	audit_net.family = address->sa_family;
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 03/17] landlock: Split struct landlock_domain from struct landlock_ruleset
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Switch all domain users to the new struct landlock_domain type
introduced by a previous commit.  This eliminates the conflation between
mutable rulesets and immutable domains.

Change the credential domain field to struct landlock_domain *, and
update all consumer functions.  Move the merge and inherit chain from
ruleset.c to domain.c; landlock_merge_ruleset() now returns struct
landlock_domain * and uses create_domain().  Lock assertions on the
destination are removed because domains have no lock.

Rename the per-layer FAM from access_masks to layers, and the single
ruleset field from access_masks to layer, to prepare for future
per-layer extensions beyond handled-access bitfields.

Clean up struct landlock_ruleset by removing domain-only fields
(hierarchy, work_free, num_layers) and replacing the layers[] FAM with a
single struct access_masks layer field.

Break the circular include between audit.h and cred.h by replacing the
cred.h include in audit.h with forward declarations.

Cc: Günther Noack <gnoack@google.com>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
- New patch.
---
 security/landlock/access.h   |   4 +-
 security/landlock/audit.c    |  12 +-
 security/landlock/audit.h    |   4 +-
 security/landlock/cred.c     |   6 +-
 security/landlock/cred.h     |  21 ++-
 security/landlock/domain.c   | 252 ++++++++++++++++++++++++++-
 security/landlock/domain.h   |  43 ++++-
 security/landlock/fs.c       |  28 ++-
 security/landlock/net.c      |   3 +-
 security/landlock/ruleset.c  | 329 ++++-------------------------------
 security/landlock/ruleset.h  | 129 ++------------
 security/landlock/syscalls.c |  10 +-
 security/landlock/task.c     |  20 +--
 13 files changed, 386 insertions(+), 475 deletions(-)

diff --git a/security/landlock/access.h b/security/landlock/access.h
index c19d5bc13944..76ab447dfcf7 100644
--- a/security/landlock/access.h
+++ b/security/landlock/access.h
@@ -19,8 +19,8 @@
 
 /*
  * All access rights that are denied by default whether they are handled or not
- * by a ruleset/layer.  This must be ORed with all ruleset->access_masks[]
- * entries when we need to get the absolute handled access masks, see
+ * by a ruleset/layer.  This must be ORed with all domain->layers[] entries when
+ * we need to get the absolute handled access masks, see
  * landlock_upgrade_handled_access_masks().
  */
 /* clang-format off */
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index 8d0edf94037d..75438b3cc887 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -135,7 +135,7 @@ static void log_domain(struct landlock_hierarchy *const hierarchy)
 }
 
 static struct landlock_hierarchy *
-get_hierarchy(const struct landlock_ruleset *const domain, const size_t layer)
+get_hierarchy(const struct landlock_domain *const domain, const size_t layer)
 {
 	struct landlock_hierarchy *hierarchy = domain->hierarchy;
 	ssize_t i;
@@ -168,7 +168,7 @@ static void test_get_hierarchy(struct kunit *const test)
 		.parent = &dom1_hierarchy,
 		.id = 30,
 	};
-	struct landlock_ruleset dom2 = {
+	struct landlock_domain dom2 = {
 		.hierarchy = &dom2_hierarchy,
 		.num_layers = 3,
 	};
@@ -182,7 +182,7 @@ static void test_get_hierarchy(struct kunit *const test)
 #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
 
 /* Get the youngest layer that denied the access_request. */
-static size_t get_denied_layer(const struct landlock_ruleset *const domain,
+static size_t get_denied_layer(const struct landlock_domain *const domain,
 			       access_mask_t *const access_request,
 			       const struct layer_access_masks *masks)
 {
@@ -202,7 +202,7 @@ static size_t get_denied_layer(const struct landlock_ruleset *const domain,
 
 static void test_get_denied_layer(struct kunit *const test)
 {
-	const struct landlock_ruleset dom = {
+	const struct landlock_domain dom = {
 		.num_layers = 5,
 	};
 	const struct layer_access_masks masks = {
@@ -440,8 +440,8 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
  * Only domains which previously appeared in the audit logs are logged again.
  * This is useful to know when a domain will never show again in the audit log.
  *
- * Called in a work queue scheduled by landlock_put_ruleset_deferred() called
- * by hook_cred_free().
+ * Called in a work queue scheduled by landlock_put_domain_deferred() called by
+ * hook_cred_free().
  */
 void landlock_log_drop_domain(const struct landlock_hierarchy *const hierarchy)
 {
diff --git a/security/landlock/audit.h b/security/landlock/audit.h
index 56778331b58c..50452a791656 100644
--- a/security/landlock/audit.h
+++ b/security/landlock/audit.h
@@ -12,7 +12,9 @@
 #include <linux/lsm_audit.h>
 
 #include "access.h"
-#include "cred.h"
+
+struct landlock_cred_security;
+struct landlock_hierarchy;
 
 enum landlock_request_type {
 	LANDLOCK_REQUEST_PTRACE = 1,
diff --git a/security/landlock/cred.c b/security/landlock/cred.c
index cc419de75cd6..58b544993db4 100644
--- a/security/landlock/cred.c
+++ b/security/landlock/cred.c
@@ -22,7 +22,7 @@ static void hook_cred_transfer(struct cred *const new,
 	const struct landlock_cred_security *const old_llcred =
 		landlock_cred(old);
 
-	landlock_get_ruleset(old_llcred->domain);
+	landlock_get_domain(old_llcred->domain);
 	*landlock_cred(new) = *old_llcred;
 }
 
@@ -35,10 +35,10 @@ static int hook_cred_prepare(struct cred *const new,
 
 static void hook_cred_free(struct cred *const cred)
 {
-	struct landlock_ruleset *const dom = landlock_cred(cred)->domain;
+	struct landlock_domain *const dom = landlock_cred(cred)->domain;
 
 	if (dom)
-		landlock_put_ruleset_deferred(dom);
+		landlock_put_domain_deferred(dom);
 }
 
 #ifdef CONFIG_AUDIT
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
index f287c56b5fd4..c42b0d3ecec8 100644
--- a/security/landlock/cred.h
+++ b/security/landlock/cred.h
@@ -16,6 +16,7 @@
 #include <linux/rcupdate.h>
 
 #include "access.h"
+#include "domain.h"
 #include "limits.h"
 #include "ruleset.h"
 #include "setup.h"
@@ -31,9 +32,9 @@
  */
 struct landlock_cred_security {
 	/**
-	 * @domain: Immutable ruleset enforced on a task.
+	 * @domain: Immutable domain enforced on a task.
 	 */
-	struct landlock_ruleset *domain;
+	struct landlock_domain *domain;
 
 #ifdef CONFIG_AUDIT
 	/**
@@ -70,22 +71,20 @@ landlock_cred(const struct cred *cred)
 static inline void landlock_cred_copy(struct landlock_cred_security *dst,
 				      const struct landlock_cred_security *src)
 {
-	landlock_put_ruleset(dst->domain);
+	landlock_put_domain(dst->domain);
 
 	*dst = *src;
 
-	landlock_get_ruleset(src->domain);
+	landlock_get_domain(src->domain);
 }
 
-static inline struct landlock_ruleset *landlock_get_current_domain(void)
+static inline struct landlock_domain *landlock_get_current_domain(void)
 {
 	return landlock_cred(current_cred())->domain;
 }
 
-/*
- * The call needs to come from an RCU read-side critical section.
- */
-static inline const struct landlock_ruleset *
+/* The call needs to come from an RCU read-side critical section. */
+static inline const struct landlock_domain *
 landlock_get_task_domain(const struct task_struct *const task)
 {
 	return landlock_cred(__task_cred(task))->domain;
@@ -126,7 +125,7 @@ landlock_get_applicable_subject(const struct cred *const cred,
 	const union access_masks_all masks_all = {
 		.masks = masks,
 	};
-	const struct landlock_ruleset *domain;
+	const struct landlock_domain *domain;
 	ssize_t layer_level;
 
 	if (!cred)
@@ -139,7 +138,7 @@ landlock_get_applicable_subject(const struct cred *const cred,
 	for (layer_level = domain->num_layers - 1; layer_level >= 0;
 	     layer_level--) {
 		union access_masks_all layer = {
-			.masks = domain->access_masks[layer_level],
+			.masks = domain->layers[layer_level],
 		};
 
 		if (layer.all & masks_all.all) {
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index cb79edf5df02..317fd94d3ccd 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -36,6 +36,36 @@
 #include "object.h"
 #include "ruleset.h"
 
+static void build_check_domain(void)
+{
+	const struct landlock_domain domain = {
+		.num_layers = ~0,
+	};
+
+	BUILD_BUG_ON(domain.num_layers < LANDLOCK_MAX_NUM_LAYERS);
+}
+
+static struct landlock_domain *create_domain(const u32 num_layers)
+{
+	struct landlock_domain *new_domain;
+
+	build_check_domain();
+	new_domain = kzalloc_flex(*new_domain, layers, num_layers,
+				  GFP_KERNEL_ACCOUNT);
+	if (!new_domain)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&new_domain->usage, 1);
+	new_domain->rules.root_inode = RB_ROOT;
+
+#if IS_ENABLED(CONFIG_INET)
+	new_domain->rules.root_net_port = RB_ROOT;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+	new_domain->num_layers = num_layers;
+	return new_domain;
+}
+
 static void free_domain(struct landlock_domain *const domain)
 {
 	might_sleep();
@@ -67,15 +97,15 @@ void landlock_put_domain_deferred(struct landlock_domain *const domain)
 	}
 }
 
-/* The returned access has the same lifetime as @ruleset. */
+/* The returned access has the same lifetime as @domain. */
 const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
+landlock_find_rule(const struct landlock_domain *const domain,
 		   const struct landlock_id id)
 {
 	const struct rb_root *root;
 	const struct rb_node *node;
 
-	root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
+	root = landlock_get_rule_root((struct landlock_rules *)&domain->rules,
 				      id.type);
 	if (IS_ERR(root))
 		return NULL;
@@ -151,7 +181,7 @@ bool landlock_unmask_layers(const struct landlock_rule *const rule,
 }
 
 typedef access_mask_t
-get_access_mask_t(const struct landlock_ruleset *const ruleset,
+get_access_mask_t(const struct landlock_domain *const domain,
 		  const u16 layer_level);
 
 /**
@@ -169,7 +199,7 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset,
  * any of the active layers in @domain.
  */
 access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+landlock_init_layer_masks(const struct landlock_domain *const domain,
 			  const access_mask_t access_request,
 			  struct layer_access_masks *const masks,
 			  const enum landlock_key_type key_type)
@@ -209,6 +239,218 @@ landlock_init_layer_masks(const struct landlock_ruleset *const domain,
 	return handled_accesses;
 }
 
+static int merge_tree(struct landlock_domain *const dst,
+		      struct landlock_ruleset *const src,
+		      const enum landlock_key_type key_type)
+{
+	struct landlock_rule *walker_rule, *next_rule;
+	struct rb_root *src_root;
+	int err = 0;
+
+	might_sleep();
+	lockdep_assert_held(&src->lock);
+
+	src_root = landlock_get_rule_root(&src->rules, key_type);
+	if (IS_ERR(src_root))
+		return PTR_ERR(src_root);
+
+	/* Merges the @src tree. */
+	rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, src_root,
+					     node) {
+		struct landlock_layer layers[] = { {
+			.level = dst->num_layers,
+		} };
+		const struct landlock_id id = {
+			.key = walker_rule->key,
+			.type = key_type,
+		};
+
+		if (WARN_ON_ONCE(walker_rule->num_layers != 1))
+			return -EINVAL;
+
+		if (WARN_ON_ONCE(walker_rule->layers[0].level != 0))
+			return -EINVAL;
+
+		layers[0].access = walker_rule->layers[0].access;
+
+		err = landlock_rule_insert(&dst->rules, id, &layers,
+					   ARRAY_SIZE(layers));
+		if (err)
+			return err;
+	}
+	return err;
+}
+
+static int merge_ruleset(struct landlock_domain *const dst,
+			 struct landlock_ruleset *const src)
+{
+	int err = 0;
+
+	might_sleep();
+	/* Should already be checked by landlock_merge_ruleset() */
+	if (WARN_ON_ONCE(!src))
+		return 0;
+	/* Only merge into a domain. */
+	if (WARN_ON_ONCE(!dst || !dst->hierarchy))
+		return -EINVAL;
+
+	mutex_lock(&src->lock);
+
+	/* Stacks the new layer. */
+	if (WARN_ON_ONCE(dst->num_layers < 1)) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+	dst->layers[dst->num_layers - 1] =
+		landlock_upgrade_handled_access_masks(src->layer);
+
+	/* Merges the @src inode tree. */
+	err = merge_tree(dst, src, LANDLOCK_KEY_INODE);
+	if (err)
+		goto out_unlock;
+
+#if IS_ENABLED(CONFIG_INET)
+	/* Merges the @src network port tree. */
+	err = merge_tree(dst, src, LANDLOCK_KEY_NET_PORT);
+	if (err)
+		goto out_unlock;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+out_unlock:
+	mutex_unlock(&src->lock);
+	return err;
+}
+
+static int inherit_tree(struct landlock_domain *const parent,
+			struct landlock_domain *const child,
+			const enum landlock_key_type key_type)
+{
+	struct landlock_rule *walker_rule, *next_rule;
+	struct rb_root *parent_root;
+	int err = 0;
+
+	might_sleep();
+
+	parent_root = landlock_get_rule_root(
+		(struct landlock_rules *)&parent->rules, key_type);
+	if (IS_ERR(parent_root))
+		return PTR_ERR(parent_root);
+
+	/* Copies the @parent inode or network tree. */
+	rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
+					     parent_root, node) {
+		const struct landlock_id id = {
+			.key = walker_rule->key,
+			.type = key_type,
+		};
+
+		err = landlock_rule_insert(&child->rules, id,
+					   &walker_rule->layers,
+					   walker_rule->num_layers);
+		if (err)
+			return err;
+	}
+	return err;
+}
+
+static int inherit_ruleset(struct landlock_domain *const parent,
+			   struct landlock_domain *const child)
+{
+	int err = 0;
+
+	might_sleep();
+	if (!parent)
+		return 0;
+
+	/* Copies the @parent inode tree. */
+	err = inherit_tree(parent, child, LANDLOCK_KEY_INODE);
+	if (err)
+		return err;
+
+#if IS_ENABLED(CONFIG_INET)
+	/* Copies the @parent network port tree. */
+	err = inherit_tree(parent, child, LANDLOCK_KEY_NET_PORT);
+	if (err)
+		return err;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+	if (WARN_ON_ONCE(child->num_layers <= parent->num_layers))
+		return -EINVAL;
+
+	/* Copies the parent layer stack and leaves a space for the new layer. */
+	memcpy(child->layers, parent->layers,
+	       flex_array_size(parent, layers, parent->num_layers));
+
+	if (WARN_ON_ONCE(!parent->hierarchy))
+		return -EINVAL;
+
+	landlock_get_hierarchy(parent->hierarchy);
+	child->hierarchy->parent = parent->hierarchy;
+
+	return 0;
+}
+
+/**
+ * landlock_merge_ruleset - Merge a ruleset with a domain
+ *
+ * @parent: Parent domain.
+ * @ruleset: New ruleset to be merged.
+ *
+ * The current task is requesting to be restricted.  The subjective credentials
+ * must not be in an overridden state. cf. landlock_init_hierarchy_log().
+ *
+ * Return: A new domain merging @parent and @ruleset on success, or ERR_PTR() on
+ * failure.  If @parent is NULL, the new domain duplicates @ruleset.
+ */
+struct landlock_domain *
+landlock_merge_ruleset(struct landlock_domain *const parent,
+		       struct landlock_ruleset *const ruleset)
+{
+	struct landlock_domain *new_dom __free(landlock_put_domain) = NULL;
+	u32 num_layers;
+	int err;
+
+	might_sleep();
+	if (WARN_ON_ONCE(!ruleset))
+		return ERR_PTR(-EINVAL);
+
+	if (parent) {
+		if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
+			return ERR_PTR(-E2BIG);
+		num_layers = parent->num_layers + 1;
+	} else {
+		num_layers = 1;
+	}
+
+	/* Creates a new domain... */
+	new_dom = create_domain(num_layers);
+	if (IS_ERR(new_dom))
+		return new_dom;
+
+	new_dom->hierarchy =
+		kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
+	if (!new_dom->hierarchy)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&new_dom->hierarchy->usage, 1);
+
+	/* ...as a child of @parent... */
+	err = inherit_ruleset(parent, new_dom);
+	if (err)
+		return ERR_PTR(err);
+
+	/* ...and including @ruleset. */
+	err = merge_ruleset(new_dom, ruleset);
+	if (err)
+		return ERR_PTR(err);
+
+	err = landlock_init_hierarchy_log(new_dom->hierarchy);
+	if (err)
+		return ERR_PTR(err);
+
+	return no_free_ptr(new_dom);
+}
+
 #ifdef CONFIG_AUDIT
 
 /**
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index afa97011ecd2..df11cb7d4f2b 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -196,7 +196,7 @@ struct landlock_domain {
 		 * @work_free: Enables to free a domain within a lockless
 		 * section.  This is only used by landlock_put_domain_deferred()
 		 * when @usage reaches zero.  The fields @usage, @num_layers and
-		 * @access_masks are then unused.
+		 * @layers are then unused.
 		 */
 		struct work_struct work_free;
 		struct {
@@ -212,7 +212,7 @@ struct landlock_domain {
 			 */
 			u32 num_layers;
 			/**
-			 * @access_masks: Contains the subset of filesystem and
+			 * @layers: Contains the subset of filesystem and
 			 * network actions that are restricted by a domain.  A
 			 * domain saves all layers of merged rulesets in a stack
 			 * (FAM), starting from the first layer to the last one.
@@ -222,28 +222,51 @@ struct landlock_domain {
 			 * overlapping access rights.  These layers are set once
 			 * and never changed for the lifetime of the domain.
 			 */
-			struct access_masks access_masks[];
+			struct access_masks layers[];
 		};
 	};
 };
 
+static inline access_mask_t
+landlock_get_fs_access_mask(const struct landlock_domain *const domain,
+			    const u16 layer_level)
+{
+	/* Handles all initially denied by default access rights. */
+	return domain->layers[layer_level].fs |
+	       _LANDLOCK_ACCESS_FS_INITIALLY_DENIED;
+}
+
+static inline access_mask_t
+landlock_get_net_access_mask(const struct landlock_domain *const domain,
+			     const u16 layer_level)
+{
+	return domain->layers[layer_level].net;
+}
+
+static inline access_mask_t
+landlock_get_scope_mask(const struct landlock_domain *const domain,
+			const u16 layer_level)
+{
+	return domain->layers[layer_level].scope;
+}
+
 /**
  * landlock_union_access_masks - Return all access rights handled in the
  *				 domain
  *
- * @domain: Landlock ruleset (used as a domain)
+ * @domain: Landlock domain
  *
  * Return: An access_masks result of the OR of all the domain's access masks.
  */
 static inline struct access_masks
-landlock_union_access_masks(const struct landlock_ruleset *const domain)
+landlock_union_access_masks(const struct landlock_domain *const domain)
 {
 	union access_masks_all matches = {};
 	size_t layer_level;
 
 	for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
 		union access_masks_all layer = {
-			.masks = domain->access_masks[layer_level],
+			.masks = domain->layers[layer_level],
 		};
 
 		matches.all |= layer.all;
@@ -258,15 +281,19 @@ void landlock_put_domain_deferred(struct landlock_domain *const domain);
 DEFINE_FREE(landlock_put_domain, struct landlock_domain *,
 	    if (!IS_ERR_OR_NULL(_T)) landlock_put_domain(_T))
 
+struct landlock_domain *
+landlock_merge_ruleset(struct landlock_domain *const parent,
+		       struct landlock_ruleset *const ruleset);
+
 const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
+landlock_find_rule(const struct landlock_domain *const domain,
 		   const struct landlock_id id);
 
 bool landlock_unmask_layers(const struct landlock_rule *const rule,
 			    struct layer_access_masks *masks);
 
 access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+landlock_init_layer_masks(const struct landlock_domain *const domain,
 			  const access_mask_t access_request,
 			  struct layer_access_masks *masks,
 			  const enum landlock_key_type key_type);
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index c1ecfe239032..3ef453fc14a6 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -336,12 +336,10 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
 	if (!d_is_dir(path->dentry) &&
 	    !access_mask_subset(access_rights, ACCESS_FILE))
 		return -EINVAL;
-	if (WARN_ON_ONCE(ruleset->num_layers != 1))
-		return -EINVAL;
-
 	/* Transforms relative access rights to absolute ones. */
-	access_rights |= LANDLOCK_MASK_ACCESS_FS &
-			 ~landlock_get_fs_access_mask(ruleset, 0);
+	access_rights |=
+		LANDLOCK_MASK_ACCESS_FS &
+		~(ruleset->layer.fs | _LANDLOCK_ACCESS_FS_INITIALLY_DENIED);
 	id.key.object = get_inode_object(d_backing_inode(path->dentry));
 	if (IS_ERR(id.key.object))
 		return PTR_ERR(id.key.object);
@@ -364,7 +362,7 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
  * Returns NULL if no rule is found or if @dentry is negative.
  */
 static const struct landlock_rule *
-find_rule(const struct landlock_ruleset *const domain,
+find_rule(const struct landlock_domain *const domain,
 	  const struct dentry *const dentry)
 {
 	const struct landlock_rule *rule;
@@ -740,7 +738,7 @@ static void test_is_eacces_with_write(struct kunit *const test)
  * Return: True if the access request is granted, false otherwise.
  */
 static bool
-is_access_to_paths_allowed(const struct landlock_ruleset *const domain,
+is_access_to_paths_allowed(const struct landlock_domain *const domain,
 			   const struct path *const path,
 			   const access_mask_t access_request_parent1,
 			   struct layer_access_masks *layer_masks_parent1,
@@ -1026,7 +1024,7 @@ static access_mask_t maybe_remove(const struct dentry *const dentry)
  * Return: True if all the domain access rights are allowed for @dir, false if
  * the walk reached @mnt_root.
  */
-static bool collect_domain_accesses(const struct landlock_ruleset *const domain,
+static bool collect_domain_accesses(const struct landlock_domain *const domain,
 				    const struct dentry *const mnt_root,
 				    struct dentry *dir,
 				    struct layer_access_masks *layer_masks_dom)
@@ -1578,8 +1576,8 @@ static int hook_path_truncate(const struct path *const path)
  * @masks: Layer access masks to unmask
  * @access: Access bits that control scoping
  */
-static void unmask_scoped_access(const struct landlock_ruleset *const client,
-				 const struct landlock_ruleset *const server,
+static void unmask_scoped_access(const struct landlock_domain *const client,
+				 const struct landlock_domain *const server,
 				 struct layer_access_masks *const masks,
 				 const access_mask_t access)
 {
@@ -1633,7 +1631,7 @@ static void unmask_scoped_access(const struct landlock_ruleset *const client,
 static int hook_unix_find(const struct path *const path, struct sock *other,
 			  int flags)
 {
-	const struct landlock_ruleset *dom_other;
+	const struct landlock_domain *dom_other;
 	const struct landlock_cred_security *subject;
 	struct layer_access_masks layer_masks;
 	struct landlock_request request = {};
@@ -1914,7 +1912,7 @@ static bool control_current_fowner(struct fown_struct *const fown)
 
 static void hook_file_set_fowner(struct file *file)
 {
-	struct landlock_ruleset *prev_dom;
+	struct landlock_domain *prev_dom;
 	struct landlock_cred_security fown_subject = {};
 	size_t fown_layer = 0;
 
@@ -1926,7 +1924,7 @@ static void hook_file_set_fowner(struct file *file)
 			landlock_get_applicable_subject(
 				current_cred(), signal_scope, &fown_layer);
 		if (new_subject) {
-			landlock_get_ruleset(new_subject->domain);
+			landlock_get_domain(new_subject->domain);
 			fown_subject = *new_subject;
 		}
 	}
@@ -1938,12 +1936,12 @@ static void hook_file_set_fowner(struct file *file)
 #endif /* CONFIG_AUDIT*/
 
 	/* May be called in an RCU read-side critical section. */
-	landlock_put_ruleset_deferred(prev_dom);
+	landlock_put_domain_deferred(prev_dom);
 }
 
 static void hook_file_free_security(struct file *file)
 {
-	landlock_put_ruleset_deferred(landlock_file(file)->fown_subject.domain);
+	landlock_put_domain_deferred(landlock_file(file)->fown_subject.domain);
 }
 
 static struct security_hook_list landlock_hooks[] __ro_after_init = {
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 34a72a4f833d..de108b3277bc 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -32,8 +32,7 @@ int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
 	BUILD_BUG_ON(sizeof(port) > sizeof(id.key.data));
 
 	/* Transforms relative access rights to absolute ones. */
-	access_rights |= LANDLOCK_MASK_ACCESS_NET &
-			 ~landlock_get_net_access_mask(ruleset, 0);
+	access_rights |= LANDLOCK_MASK_ACCESS_NET & ~ruleset->layer.net;
 
 	mutex_lock(&ruleset->lock);
 	err = landlock_insert_rule(ruleset, id, access_rights);
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 0cf31a7e4c7b..c220e0f9cf5f 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -20,22 +20,27 @@
 #include <linux/refcount.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/workqueue.h>
 
 #include "access.h"
-#include "domain.h"
 #include "limits.h"
 #include "object.h"
 #include "ruleset.h"
 
-static struct landlock_ruleset *create_ruleset(const u32 num_layers)
+struct landlock_ruleset *
+landlock_create_ruleset(const access_mask_t fs_access_mask,
+			const access_mask_t net_access_mask,
+			const access_mask_t scope_mask)
 {
 	struct landlock_ruleset *new_ruleset;
 
-	new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers,
-				   GFP_KERNEL_ACCOUNT);
+	/* Informs about useless ruleset. */
+	if (!fs_access_mask && !net_access_mask && !scope_mask)
+		return ERR_PTR(-ENOMSG);
+
+	new_ruleset = kzalloc(sizeof(*new_ruleset), GFP_KERNEL_ACCOUNT);
 	if (!new_ruleset)
 		return ERR_PTR(-ENOMEM);
+
 	refcount_set(&new_ruleset->usage, 1);
 	mutex_init(&new_ruleset->lock);
 	new_ruleset->rules.root_inode = RB_ROOT;
@@ -44,34 +49,21 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
 	new_ruleset->rules.root_net_port = RB_ROOT;
 #endif /* IS_ENABLED(CONFIG_INET) */
 
-	new_ruleset->num_layers = num_layers;
-	/*
-	 * hierarchy = NULL
-	 * rules.num_rules = 0
-	 * access_masks[] = 0
-	 */
-	return new_ruleset;
-}
-
-struct landlock_ruleset *
-landlock_create_ruleset(const access_mask_t fs_access_mask,
-			const access_mask_t net_access_mask,
-			const access_mask_t scope_mask)
-{
-	struct landlock_ruleset *new_ruleset;
-
-	/* Informs about useless ruleset. */
-	if (!fs_access_mask && !net_access_mask && !scope_mask)
-		return ERR_PTR(-ENOMSG);
-	new_ruleset = create_ruleset(1);
-	if (IS_ERR(new_ruleset))
-		return new_ruleset;
-	if (fs_access_mask)
-		landlock_add_fs_access_mask(new_ruleset, fs_access_mask, 0);
-	if (net_access_mask)
-		landlock_add_net_access_mask(new_ruleset, net_access_mask, 0);
-	if (scope_mask)
-		landlock_add_scope_mask(new_ruleset, scope_mask, 0);
+	/* Should already be checked in sys_landlock_create_ruleset(). */
+	if (fs_access_mask) {
+		WARN_ON_ONCE(fs_access_mask !=
+			     (fs_access_mask & LANDLOCK_MASK_ACCESS_FS));
+		new_ruleset->layer.fs |= fs_access_mask;
+	}
+	if (net_access_mask) {
+		WARN_ON_ONCE(net_access_mask !=
+			     (net_access_mask & LANDLOCK_MASK_ACCESS_NET));
+		new_ruleset->layer.net |= net_access_mask;
+	}
+	if (scope_mask) {
+		WARN_ON_ONCE(scope_mask != (scope_mask & LANDLOCK_MASK_SCOPE));
+		new_ruleset->layer.scope |= scope_mask;
+	}
 	return new_ruleset;
 }
 
@@ -128,7 +120,7 @@ create_rule(const struct landlock_id id,
 		return ERR_PTR(-ENOMEM);
 	RB_CLEAR_NODE(&new_rule->node);
 	if (is_object_pointer(id.type)) {
-		/* This should have been caught by insert_rule(). */
+		/* This should have been caught by landlock_rule_insert(). */
 		WARN_ON_ONCE(!id.key.object);
 		landlock_get_object(id.key.object);
 	}
@@ -144,12 +136,6 @@ create_rule(const struct landlock_id id,
 	return new_rule;
 }
 
-static struct rb_root *get_root(struct landlock_ruleset *const ruleset,
-				const enum landlock_key_type key_type)
-{
-	return landlock_get_rule_root(&ruleset->rules, key_type);
-}
-
 static void free_rule(struct landlock_rule *const rule,
 		      const enum landlock_key_type key_type)
 {
@@ -166,16 +152,12 @@ static void build_check_ruleset(void)
 	const struct landlock_rules rules = {
 		.num_rules = ~0,
 	};
-	const struct landlock_ruleset ruleset = {
-		.num_layers = ~0,
-	};
 
 	BUILD_BUG_ON(rules.num_rules < LANDLOCK_MAX_NUM_RULES);
-	BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
 }
 
 /**
- * insert_rule - Create and insert a rule in a rule set
+ * landlock_rule_insert - Create and insert a rule in a rule set
  *
  * @rules: The rule storage to be updated.  The caller is responsible for
  *         any required locking.  For rulesets, this means holding
@@ -197,10 +179,10 @@ static void build_check_ruleset(void)
  *
  * Return: 0 on success, -errno on failure.
  */
-static int insert_rule(struct landlock_rules *const rules,
-		       const struct landlock_id id,
-		       const struct landlock_layer (*layers)[],
-		       const size_t num_layers)
+int landlock_rule_insert(struct landlock_rules *const rules,
+			 const struct landlock_id id,
+			 const struct landlock_layer (*layers)[],
+			 const size_t num_layers)
 {
 	struct rb_node **walker_node;
 	struct rb_node *parent_node = NULL;
@@ -240,7 +222,7 @@ static int insert_rule(struct landlock_rules *const rules,
 		if ((*layers)[0].level == 0) {
 			/*
 			 * Extends access rights when the request comes from
-			 * landlock_add_rule(2), i.e. contained by a ruleset.
+			 * landlock_add_rule(2), i.e. @rules is not a domain.
 			 */
 			if (WARN_ON_ONCE(this->num_layers != 1))
 				return -EINVAL;
@@ -301,176 +283,14 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 {
 	struct landlock_layer layers[] = { {
 		.access = access,
-		/* When @level is zero, insert_rule() extends @ruleset. */
+		/* When @level is zero, landlock_rule_insert() extends @ruleset. */
 		.level = 0,
 	} };
 
 	build_check_layer();
 	lockdep_assert_held(&ruleset->lock);
-	return insert_rule(&ruleset->rules, id, &layers, ARRAY_SIZE(layers));
-}
-
-static int merge_tree(struct landlock_ruleset *const dst,
-		      struct landlock_ruleset *const src,
-		      const enum landlock_key_type key_type)
-{
-	struct landlock_rule *walker_rule, *next_rule;
-	struct rb_root *src_root;
-	int err = 0;
-
-	might_sleep();
-	lockdep_assert_held(&dst->lock);
-	lockdep_assert_held(&src->lock);
-
-	src_root = get_root(src, key_type);
-	if (IS_ERR(src_root))
-		return PTR_ERR(src_root);
-
-	/* Merges the @src tree. */
-	rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, src_root,
-					     node) {
-		struct landlock_layer layers[] = { {
-			.level = dst->num_layers,
-		} };
-		const struct landlock_id id = {
-			.key = walker_rule->key,
-			.type = key_type,
-		};
-
-		if (WARN_ON_ONCE(walker_rule->num_layers != 1))
-			return -EINVAL;
-
-		if (WARN_ON_ONCE(walker_rule->layers[0].level != 0))
-			return -EINVAL;
-
-		layers[0].access = walker_rule->layers[0].access;
-
-		err = insert_rule(&dst->rules, id, &layers, ARRAY_SIZE(layers));
-		if (err)
-			return err;
-	}
-	return err;
-}
-
-static int merge_ruleset(struct landlock_ruleset *const dst,
-			 struct landlock_ruleset *const src)
-{
-	int err = 0;
-
-	might_sleep();
-	/* Should already be checked by landlock_merge_ruleset() */
-	if (WARN_ON_ONCE(!src))
-		return 0;
-	/* Only merge into a domain. */
-	if (WARN_ON_ONCE(!dst || !dst->hierarchy))
-		return -EINVAL;
-
-	/* Locks @dst first because we are its only owner. */
-	mutex_lock(&dst->lock);
-	mutex_lock_nested(&src->lock, SINGLE_DEPTH_NESTING);
-
-	/* Stacks the new layer. */
-	if (WARN_ON_ONCE(src->num_layers != 1 || dst->num_layers < 1)) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-	dst->access_masks[dst->num_layers - 1] =
-		landlock_upgrade_handled_access_masks(src->access_masks[0]);
-
-	/* Merges the @src inode tree. */
-	err = merge_tree(dst, src, LANDLOCK_KEY_INODE);
-	if (err)
-		goto out_unlock;
-
-#if IS_ENABLED(CONFIG_INET)
-	/* Merges the @src network port tree. */
-	err = merge_tree(dst, src, LANDLOCK_KEY_NET_PORT);
-	if (err)
-		goto out_unlock;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
-out_unlock:
-	mutex_unlock(&src->lock);
-	mutex_unlock(&dst->lock);
-	return err;
-}
-
-static int inherit_tree(struct landlock_ruleset *const parent,
-			struct landlock_ruleset *const child,
-			const enum landlock_key_type key_type)
-{
-	struct landlock_rule *walker_rule, *next_rule;
-	struct rb_root *parent_root;
-	int err = 0;
-
-	might_sleep();
-	lockdep_assert_held(&parent->lock);
-	lockdep_assert_held(&child->lock);
-
-	parent_root = get_root(parent, key_type);
-	if (IS_ERR(parent_root))
-		return PTR_ERR(parent_root);
-
-	/* Copies the @parent inode or network tree. */
-	rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
-					     parent_root, node) {
-		const struct landlock_id id = {
-			.key = walker_rule->key,
-			.type = key_type,
-		};
-
-		err = insert_rule(&child->rules, id, &walker_rule->layers,
-				  walker_rule->num_layers);
-		if (err)
-			return err;
-	}
-	return err;
-}
-
-static int inherit_ruleset(struct landlock_ruleset *const parent,
-			   struct landlock_ruleset *const child)
-{
-	int err = 0;
-
-	might_sleep();
-	if (!parent)
-		return 0;
-
-	/* Locks @child first because we are its only owner. */
-	mutex_lock(&child->lock);
-	mutex_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
-
-	/* Copies the @parent inode tree. */
-	err = inherit_tree(parent, child, LANDLOCK_KEY_INODE);
-	if (err)
-		goto out_unlock;
-
-#if IS_ENABLED(CONFIG_INET)
-	/* Copies the @parent network port tree. */
-	err = inherit_tree(parent, child, LANDLOCK_KEY_NET_PORT);
-	if (err)
-		goto out_unlock;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
-	if (WARN_ON_ONCE(child->num_layers <= parent->num_layers)) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-	/* Copies the parent layer stack and leaves a space for the new layer. */
-	memcpy(child->access_masks, parent->access_masks,
-	       flex_array_size(parent, access_masks, parent->num_layers));
-
-	if (WARN_ON_ONCE(!parent->hierarchy)) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-	landlock_get_hierarchy(parent->hierarchy);
-	child->hierarchy->parent = parent->hierarchy;
-
-out_unlock:
-	mutex_unlock(&parent->lock);
-	mutex_unlock(&child->lock);
-	return err;
+	return landlock_rule_insert(&ruleset->rules, id, &layers,
+				    ARRAY_SIZE(layers));
 }
 
 void landlock_free_rules(struct landlock_rules *const rules)
@@ -493,7 +313,6 @@ static void free_ruleset(struct landlock_ruleset *const ruleset)
 {
 	might_sleep();
 	landlock_free_rules(&ruleset->rules);
-	landlock_put_hierarchy(ruleset->hierarchy);
 	kfree(ruleset);
 }
 
@@ -503,81 +322,3 @@ void landlock_put_ruleset(struct landlock_ruleset *const ruleset)
 	if (ruleset && refcount_dec_and_test(&ruleset->usage))
 		free_ruleset(ruleset);
 }
-
-static void free_ruleset_work(struct work_struct *const work)
-{
-	struct landlock_ruleset *ruleset;
-
-	ruleset = container_of(work, struct landlock_ruleset, work_free);
-	free_ruleset(ruleset);
-}
-
-/* Only called by hook_cred_free(). */
-void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
-{
-	if (ruleset && refcount_dec_and_test(&ruleset->usage)) {
-		INIT_WORK(&ruleset->work_free, free_ruleset_work);
-		schedule_work(&ruleset->work_free);
-	}
-}
-
-/**
- * landlock_merge_ruleset - Merge a ruleset with a domain
- *
- * @parent: Parent domain.
- * @ruleset: New ruleset to be merged.
- *
- * The current task is requesting to be restricted.  The subjective credentials
- * must not be in an overridden state. cf. landlock_init_hierarchy_log().
- *
- * Return: A new domain merging @parent and @ruleset on success, or ERR_PTR()
- * on failure.  If @parent is NULL, the new domain duplicates @ruleset.
- */
-struct landlock_ruleset *
-landlock_merge_ruleset(struct landlock_ruleset *const parent,
-		       struct landlock_ruleset *const ruleset)
-{
-	struct landlock_ruleset *new_dom __free(landlock_put_ruleset) = NULL;
-	u32 num_layers;
-	int err;
-
-	might_sleep();
-	if (WARN_ON_ONCE(!ruleset || parent == ruleset))
-		return ERR_PTR(-EINVAL);
-
-	if (parent) {
-		if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
-			return ERR_PTR(-E2BIG);
-		num_layers = parent->num_layers + 1;
-	} else {
-		num_layers = 1;
-	}
-
-	/* Creates a new domain... */
-	new_dom = create_ruleset(num_layers);
-	if (IS_ERR(new_dom))
-		return new_dom;
-
-	new_dom->hierarchy =
-		kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
-	if (!new_dom->hierarchy)
-		return ERR_PTR(-ENOMEM);
-
-	refcount_set(&new_dom->hierarchy->usage, 1);
-
-	/* ...as a child of @parent... */
-	err = inherit_ruleset(parent, new_dom);
-	if (err)
-		return ERR_PTR(err);
-
-	/* ...and including @ruleset. */
-	err = merge_ruleset(new_dom, ruleset);
-	if (err)
-		return ERR_PTR(err);
-
-	err = landlock_init_hierarchy_log(new_dom->hierarchy);
-	if (err)
-		return ERR_PTR(err);
-
-	return no_free_ptr(new_dom);
-}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 1d3a9c36eb74..bf127ff7496e 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -14,14 +14,11 @@
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
-#include <linux/workqueue.h>
 
 #include "access.h"
 #include "limits.h"
 #include "object.h"
 
-struct landlock_hierarchy;
-
 /**
  * struct landlock_layer - Access rights for a given layer
  */
@@ -147,54 +144,20 @@ struct landlock_ruleset {
 	 * @rules: Red-black tree storage for rules.
 	 */
 	struct landlock_rules rules;
-
 	/**
-	 * @hierarchy: Enables hierarchy identification even when a parent
-	 * domain vanishes.  This is needed for the ptrace protection.
+	 * @lock: Protects against concurrent modifications of @rules, if @usage
+	 * is greater than zero.
+	 */
+	struct mutex lock;
+	/**
+	 * @usage: Number of file descriptors referencing this ruleset.
 	 */
-	struct landlock_hierarchy *hierarchy;
-	union {
-		/**
-		 * @work_free: Enables to free a ruleset within a lockless
-		 * section.  This is only used by
-		 * landlock_put_ruleset_deferred() when @usage reaches zero. The
-		 * fields @lock, @usage, @num_layers and @access_masks are then
-		 * unused.
-		 */
-		struct work_struct work_free;
-		struct {
-			/**
-			 * @lock: Protects against concurrent modifications of
-			 * @root, if @usage is greater than zero.
-			 */
-			struct mutex lock;
-			/**
-			 * @usage: Number of processes (i.e. domains) or file
-			 * descriptors referencing this ruleset.
-			 */
-			refcount_t usage;
-			/**
-			 * @num_layers: Number of layers that are used in this
-			 * ruleset.  This enables to check that all the layers
-			 * allow an access request.  A value of 0 identifies a
-			 * non-merged ruleset (i.e. not a domain).
-			 */
-			u32 num_layers;
-			/**
-			 * @access_masks: Contains the subset of filesystem and
-			 * network actions that are restricted by a ruleset.
-			 * A domain saves all layers of merged rulesets in a
-			 * stack (FAM), starting from the first layer to the
-			 * last one.  These layers are used when merging
-			 * rulesets, for user space backward compatibility
-			 * (i.e. future-proof), and to properly handle merged
-			 * rulesets without overlapping access rights.  These
-			 * layers are set once and never changed for the
-			 * lifetime of the ruleset.
-			 */
-			struct access_masks access_masks[];
-		};
-	};
+	refcount_t usage;
+	/**
+	 * @layer: Contains the subset of filesystem and network actions that
+	 * are handled by this ruleset.
+	 */
+	struct access_masks layer;
 };
 
 struct landlock_ruleset *
@@ -203,7 +166,6 @@ landlock_create_ruleset(const access_mask_t access_mask_fs,
 			const access_mask_t scope_mask);
 
 void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
-void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
 
 DEFINE_FREE(landlock_put_ruleset, struct landlock_ruleset *,
 	    if (!IS_ERR_OR_NULL(_T)) landlock_put_ruleset(_T))
@@ -212,11 +174,12 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 			 const struct landlock_id id,
 			 const access_mask_t access);
 
-void landlock_free_rules(struct landlock_rules *const rules);
+int landlock_rule_insert(struct landlock_rules *const rules,
+			 const struct landlock_id id,
+			 const struct landlock_layer (*layers)[],
+			 const size_t num_layers);
 
-struct landlock_ruleset *
-landlock_merge_ruleset(struct landlock_ruleset *const parent,
-		       struct landlock_ruleset *const ruleset);
+void landlock_free_rules(struct landlock_rules *const rules);
 
 /**
  * landlock_get_rule_root - Get the root of a rule tree by key type
@@ -251,62 +214,4 @@ static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
 		refcount_inc(&ruleset->usage);
 }
 
-static inline void
-landlock_add_fs_access_mask(struct landlock_ruleset *const ruleset,
-			    const access_mask_t fs_access_mask,
-			    const u16 layer_level)
-{
-	access_mask_t fs_mask = fs_access_mask & LANDLOCK_MASK_ACCESS_FS;
-
-	/* Should already be checked in sys_landlock_create_ruleset(). */
-	WARN_ON_ONCE(fs_access_mask != fs_mask);
-	ruleset->access_masks[layer_level].fs |= fs_mask;
-}
-
-static inline void
-landlock_add_net_access_mask(struct landlock_ruleset *const ruleset,
-			     const access_mask_t net_access_mask,
-			     const u16 layer_level)
-{
-	access_mask_t net_mask = net_access_mask & LANDLOCK_MASK_ACCESS_NET;
-
-	/* Should already be checked in sys_landlock_create_ruleset(). */
-	WARN_ON_ONCE(net_access_mask != net_mask);
-	ruleset->access_masks[layer_level].net |= net_mask;
-}
-
-static inline void
-landlock_add_scope_mask(struct landlock_ruleset *const ruleset,
-			const access_mask_t scope_mask, const u16 layer_level)
-{
-	access_mask_t mask = scope_mask & LANDLOCK_MASK_SCOPE;
-
-	/* Should already be checked in sys_landlock_create_ruleset(). */
-	WARN_ON_ONCE(scope_mask != mask);
-	ruleset->access_masks[layer_level].scope |= mask;
-}
-
-static inline access_mask_t
-landlock_get_fs_access_mask(const struct landlock_ruleset *const ruleset,
-			    const u16 layer_level)
-{
-	/* Handles all initially denied by default access rights. */
-	return ruleset->access_masks[layer_level].fs |
-	       _LANDLOCK_ACCESS_FS_INITIALLY_DENIED;
-}
-
-static inline access_mask_t
-landlock_get_net_access_mask(const struct landlock_ruleset *const ruleset,
-			     const u16 layer_level)
-{
-	return ruleset->access_masks[layer_level].net;
-}
-
-static inline access_mask_t
-landlock_get_scope_mask(const struct landlock_ruleset *const ruleset,
-			const u16 layer_level)
-{
-	return ruleset->access_masks[layer_level].scope;
-}
-
 #endif /* _SECURITY_LANDLOCK_RULESET_H */
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index accfd2e5a0cd..73ccc32d0afd 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -283,8 +283,6 @@ static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
 	if (!(fd_file(ruleset_f)->f_mode & mode))
 		return ERR_PTR(-EPERM);
 	ruleset = fd_file(ruleset_f)->private_data;
-	if (WARN_ON_ONCE(ruleset->num_layers != 1))
-		return ERR_PTR(-EINVAL);
 	landlock_get_ruleset(ruleset);
 	return ruleset;
 }
@@ -341,7 +339,7 @@ static int add_rule_path_beneath(struct landlock_ruleset *const ruleset,
 		return -ENOMSG;
 
 	/* Checks that allowed_access matches the @ruleset constraints. */
-	mask = ruleset->access_masks[0].fs;
+	mask = ruleset->layer.fs;
 	if ((path_beneath_attr.allowed_access | mask) != mask)
 		return -EINVAL;
 
@@ -377,7 +375,7 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
 		return -ENOMSG;
 
 	/* Checks that allowed_access matches the @ruleset constraints. */
-	mask = landlock_get_net_access_mask(ruleset, 0);
+	mask = ruleset->layer.net;
 	if ((net_port_attr.allowed_access | mask) != mask)
 		return -EINVAL;
 
@@ -556,7 +554,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 		 * manipulating the current credentials because they are
 		 * dedicated per thread.
 		 */
-		struct landlock_ruleset *const new_dom =
+		struct landlock_domain *const new_dom =
 			landlock_merge_ruleset(new_llcred->domain, ruleset);
 		if (IS_ERR(new_dom)) {
 			abort_creds(new_cred);
@@ -571,7 +569,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 #endif /* CONFIG_AUDIT */
 
 		/* Replaces the old (prepared) domain. */
-		landlock_put_ruleset(new_llcred->domain);
+		landlock_put_domain(new_llcred->domain);
 		new_llcred->domain = new_dom;
 
 #ifdef CONFIG_AUDIT
diff --git a/security/landlock/task.c b/security/landlock/task.c
index 6d46042132ce..2e7ee62958b2 100644
--- a/security/landlock/task.c
+++ b/security/landlock/task.c
@@ -41,8 +41,8 @@
  * Return: True if @parent is an ancestor of or equal to @child, false
  * otherwise.
  */
-static bool domain_scope_le(const struct landlock_ruleset *const parent,
-			    const struct landlock_ruleset *const child)
+static bool domain_scope_le(const struct landlock_domain *const parent,
+			    const struct landlock_domain *const child)
 {
 	const struct landlock_hierarchy *walker;
 
@@ -63,8 +63,8 @@ static bool domain_scope_le(const struct landlock_ruleset *const parent,
 	return false;
 }
 
-static int domain_ptrace(const struct landlock_ruleset *const parent,
-			 const struct landlock_ruleset *const child)
+static int domain_ptrace(const struct landlock_domain *const parent,
+			 const struct landlock_domain *const child)
 {
 	if (domain_scope_le(parent, child))
 		return 0;
@@ -97,7 +97,7 @@ static int hook_ptrace_access_check(struct task_struct *const child,
 
 	scoped_guard(rcu)
 	{
-		const struct landlock_ruleset *const child_dom =
+		const struct landlock_domain *const child_dom =
 			landlock_get_task_domain(child);
 		err = domain_ptrace(parent_subject->domain, child_dom);
 	}
@@ -136,7 +136,7 @@ static int hook_ptrace_access_check(struct task_struct *const child,
 static int hook_ptrace_traceme(struct task_struct *const parent)
 {
 	const struct landlock_cred_security *parent_subject;
-	const struct landlock_ruleset *child_dom;
+	const struct landlock_domain *child_dom;
 	int err;
 
 	child_dom = landlock_get_current_domain();
@@ -177,8 +177,8 @@ static int hook_ptrace_traceme(struct task_struct *const parent)
  * Return: True if @server is in a different domain from @client and @client
  * is scoped to access @server (i.e. access should be denied), false otherwise.
  */
-static bool domain_is_scoped(const struct landlock_ruleset *const client,
-			     const struct landlock_ruleset *const server,
+static bool domain_is_scoped(const struct landlock_domain *const client,
+			     const struct landlock_domain *const server,
 			     access_mask_t scope)
 {
 	int client_layer, server_layer;
@@ -237,9 +237,9 @@ static bool domain_is_scoped(const struct landlock_ruleset *const client,
 }
 
 static bool sock_is_scoped(struct sock *const other,
-			   const struct landlock_ruleset *const domain)
+			   const struct landlock_domain *const domain)
 {
-	const struct landlock_ruleset *dom_other;
+	const struct landlock_domain *dom_other;
 
 	/* The credentials will not change. */
 	lockdep_assert_held(&unix_sk(other)->lock);
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 07/17] landlock: Add landlock_add_rule_fs and landlock_add_rule_net tracepoints
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Add tracepoints for Landlock rule addition: landlock_add_rule_fs for
filesystem rules and landlock_add_rule_net for network rules.  These
enable eBPF programs and ftrace consumers to correlate filesystem
objects and network ports with their rulesets.

Both tracepoints include lockdep_assert_held(&ruleset->lock) in
TP_fast_assign to enforce that the ruleset lock is held during emission.
This guarantees that eBPF programs reading the ruleset via BTF see a
consistent version and the rule just inserted.

Add a version field to struct landlock_ruleset, incremented under the
ruleset lock on each rule insertion.  The version fills the existing
4-byte hole between usage and id (no struct size increase).  Add a
static assertion to ensure the version type can hold
LANDLOCK_MAX_NUM_RULES.

For filesystem rules, resolve the absolute path via
resolve_path_for_trace() which uses d_absolute_path().  Unlike d_path()
(used by audit), d_absolute_path() produces namespace-independent paths
that do not depend on the tracer's chroot state.  This makes trace
output deterministic regardless of mount namespace configuration.
Differentiate error cases: "<too_long>" for -ENAMETOOLONG and
"<unreachable>" for anonymous files or detached mounts.

Add DEFINE_FREE(__putname) to include/linux/fs.h alongside the
__getname()/__putname() definitions.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Günther Noack <gnoack@google.com>
Cc: Justin Suess <utilityemal77@gmail.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
https://lore.kernel.org/r/20250523165741.693976-5-mic@digikod.net
- Added landlock_add_rule_net tracepoint for network rules.
- Dropped key=inode:0x%lx from add_rule_fs printk, using dev/ino
  instead.
- Used ruleset Landlock ID instead of kernel pointer in printk.
- Differentiated d_absolute_path() error cases (suggested by
  Tingmao Wang).
- Moved DEFINE_FREE(__putname) to include/linux/fs.h (noticed by
  Tingmao Wang).
- Added version field to struct landlock_ruleset.
- Added version to add_rule trace events (format:
  ruleset=<id>.<version>).
- Added d_absolute_path() vs d_path() rationale to commit message.
---
 include/linux/fs.h              |  1 +
 include/trace/events/landlock.h | 93 ++++++++++++++++++++++++++++++---
 security/landlock/fs.c          | 19 +++++++
 security/landlock/fs.h          | 30 +++++++++++
 security/landlock/net.c         | 12 +++++
 security/landlock/ruleset.c     | 21 +++++++-
 security/landlock/ruleset.h     |  6 +++
 7 files changed, 172 insertions(+), 10 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..3849382fad4a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2562,6 +2562,7 @@ extern void __init vfs_caches_init(void);
 
 #define __getname()		kmalloc(PATH_MAX, GFP_KERNEL)
 #define __putname(name)		kfree(name)
+DEFINE_FREE(__putname, char *, if (_T) __putname(_T))
 
 void emergency_thaw_all(void);
 extern int sync_filesystem(struct super_block *);
diff --git a/include/trace/events/landlock.h b/include/trace/events/landlock.h
index 5e847844fbf7..f1e96c447b97 100644
--- a/include/trace/events/landlock.h
+++ b/include/trace/events/landlock.h
@@ -13,6 +13,7 @@
 #include <linux/tracepoint.h>
 
 struct landlock_ruleset;
+struct path;
 
 /**
  * DOC: Landlock trace events
@@ -41,6 +42,10 @@ struct landlock_ruleset;
  * information about all sandboxed processes on the system.  See
  * Documentation/admin-guide/LSM/landlock.rst for security considerations
  * and privilege requirements.
+ *
+ * Network port fields use __u64 in host endianness, matching the
+ * landlock_net_port_attr.port UAPI convention.  Callers convert from
+ * network byte order before emitting the event.
  */
 
 /**
@@ -56,19 +61,20 @@ TRACE_EVENT(
 
 	TP_ARGS(ruleset),
 
-	TP_STRUCT__entry(__field(__u64, ruleset_id) __field(access_mask_t,
-							    handled_fs)
+	TP_STRUCT__entry(__field(__u64, ruleset_id) __field(
+		__u32, ruleset_version) __field(access_mask_t, handled_fs)
 				 __field(access_mask_t, handled_net)
 					 __field(access_mask_t, scoped)),
 
 	TP_fast_assign(__entry->ruleset_id = ruleset->id;
+		       __entry->ruleset_version = ruleset->version;
 		       __entry->handled_fs = ruleset->layer.fs;
 		       __entry->handled_net = ruleset->layer.net;
 		       __entry->scoped = ruleset->layer.scope;),
 
-	TP_printk("ruleset=%llx handled_fs=0x%x handled_net=0x%x scoped=0x%x",
-		  __entry->ruleset_id, __entry->handled_fs,
-		  __entry->handled_net, __entry->scoped));
+	TP_printk("ruleset=%llx.%u handled_fs=0x%x handled_net=0x%x scoped=0x%x",
+		  __entry->ruleset_id, __entry->ruleset_version,
+		  __entry->handled_fs, __entry->handled_net, __entry->scoped));
 
 /**
  * landlock_free_ruleset - Ruleset freed
@@ -82,12 +88,83 @@ TRACE_EVENT(landlock_free_ruleset,
 
 	    TP_ARGS(ruleset),
 
-	    TP_STRUCT__entry(__field(__u64, ruleset_id)),
+	    TP_STRUCT__entry(__field(__u64, ruleset_id)
+				     __field(__u32, ruleset_version)),
+
+	    TP_fast_assign(__entry->ruleset_id = ruleset->id;
+			   __entry->ruleset_version = ruleset->version;),
+
+	    TP_printk("ruleset=%llx.%u", __entry->ruleset_id,
+		      __entry->ruleset_version));
+
+/**
+ * landlock_add_rule_fs - filesystem rule added to a ruleset
+ * @ruleset: Source ruleset (never NULL)
+ * @access_rights: Allowed access mask for this rule
+ * @path: Filesystem path for the rule (never NULL)
+ * @pathname: Resolved absolute path string (never NULL; error placeholder
+ *            on resolution failure)
+ */
+TRACE_EVENT(
+	landlock_add_rule_fs,
+
+	TP_PROTO(const struct landlock_ruleset *ruleset,
+		 access_mask_t access_rights, const struct path *path,
+		 const char *pathname),
+
+	TP_ARGS(ruleset, access_rights, path, pathname),
+
+	TP_STRUCT__entry(__field(__u64, ruleset_id) __field(__u32,
+							    ruleset_version)
+				 __field(access_mask_t, access_rights)
+					 __field(dev_t, dev) __field(ino_t, ino)
+						 __string(pathname, pathname)),
+
+	TP_fast_assign(lockdep_assert_held(&ruleset->lock);
+		       __entry->ruleset_id = ruleset->id;
+		       __entry->ruleset_version = ruleset->version;
+		       __entry->access_rights = access_rights;
+		       __entry->dev = path->dentry->d_sb->s_dev;
+		       /*
+			     * The inode number may not be the user-visible one,
+			     * but it will be the same used by audit.
+			     */
+		       __entry->ino = d_backing_inode(path->dentry)->i_ino;
+		       __assign_str(pathname);),
+
+	TP_printk("ruleset=%llx.%u access_rights=0x%x dev=%u:%u ino=%lu path=%s",
+		  __entry->ruleset_id, __entry->ruleset_version,
+		  __entry->access_rights, MAJOR(__entry->dev),
+		  MINOR(__entry->dev), __entry->ino,
+		  __print_untrusted_str(pathname)));
+
+/**
+ * landlock_add_rule_net - network port rule added to a ruleset
+ * @ruleset: Source ruleset (never NULL)
+ * @port: Network port number in host endianness
+ * @access_rights: Allowed access mask for this rule
+ */
+TRACE_EVENT(landlock_add_rule_net,
+
+	    TP_PROTO(const struct landlock_ruleset *ruleset, __u64 port,
+		     access_mask_t access_rights),
+
+	    TP_ARGS(ruleset, port, access_rights),
 
-	    TP_fast_assign(__entry->ruleset_id = ruleset->id;),
+	    TP_STRUCT__entry(__field(__u64, ruleset_id) __field(__u32,
+								ruleset_version)
+				     __field(access_mask_t, access_rights)
+					     __field(__u64, port)),
 
-	    TP_printk("ruleset=%llx", __entry->ruleset_id));
+	    TP_fast_assign(lockdep_assert_held(&ruleset->lock);
+			   __entry->ruleset_id = ruleset->id;
+			   __entry->ruleset_version = ruleset->version;
+			   __entry->access_rights = access_rights;
+			   __entry->port = port;),
 
+	    TP_printk("ruleset=%llx.%u access_rights=0x%x port=%llu",
+		      __entry->ruleset_id, __entry->ruleset_version,
+		      __entry->access_rights, __entry->port));
 #endif /* _TRACE_LANDLOCK_H */
 
 /* This part must be outside protection */
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index a0b4d0dd261f..f627ecc537a5 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -52,6 +52,8 @@
 #include "ruleset.h"
 #include "setup.h"
 
+#include <trace/events/landlock.h>
+
 /* Underlying object management */
 
 static void release_inode(struct landlock_object *const object)
@@ -345,7 +347,24 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
 		return PTR_ERR(id.key.object);
 	mutex_lock(&ruleset->lock);
 	err = landlock_insert_rule(ruleset, id, access_rights);
+
+	/*
+	 * Emit after the rule insertion succeeds, so every event corresponds
+	 * to a rule that is actually in the ruleset.  The ruleset lock is
+	 * still held for BTF consistency (enforced by lockdep_assert_held
+	 * in TP_fast_assign).
+	 */
+	if (!err && trace_landlock_add_rule_fs_enabled()) {
+		char *buffer __free(__putname) = __getname();
+		const char *pathname =
+			buffer ? resolve_path_for_trace(path, buffer) :
+				 "<no_mem>";
+
+		trace_landlock_add_rule_fs(ruleset, access_rights, path,
+					   pathname);
+	}
 	mutex_unlock(&ruleset->lock);
+
 	/*
 	 * No need to check for an error because landlock_insert_rule()
 	 * increments the refcount for the new object if needed.
diff --git a/security/landlock/fs.h b/security/landlock/fs.h
index bf9948941f2f..cc54133ae33d 100644
--- a/security/landlock/fs.h
+++ b/security/landlock/fs.h
@@ -11,6 +11,7 @@
 #define _SECURITY_LANDLOCK_FS_H
 
 #include <linux/build_bug.h>
+#include <linux/cleanup.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/rcupdate.h>
@@ -128,4 +129,33 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
 			    const struct path *const path,
 			    access_mask_t access_hierarchy);
 
+/**
+ * resolve_path_for_trace - Resolve a path for tracepoint display
+ *
+ * @path: The path to resolve.
+ * @buf: A buffer of at least PATH_MAX bytes for the resolved path.
+ *
+ * Uses d_absolute_path() to produce a namespace-independent absolute path,
+ * unlike d_path() which resolves relative to the process's chroot.  This
+ * ensures trace output is deterministic regardless of the tracer's mount
+ * namespace.
+ *
+ * Return: A pointer into @buf with the resolved path, or an error string
+ * ("<too_long>", "<unreachable>").
+ */
+static inline const char *resolve_path_for_trace(const struct path *path,
+						  char *buf)
+{
+	const char *p;
+
+	p = d_absolute_path(path, buf, PATH_MAX);
+	if (!IS_ERR_OR_NULL(p))
+		return p;
+
+	if (PTR_ERR(p) == -ENAMETOOLONG)
+		return "<too_long>";
+
+	return "<unreachable>";
+}
+
 #endif /* _SECURITY_LANDLOCK_FS_H */
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 63f1fe0ec876..1e893123e787 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -20,6 +20,8 @@
 #include "net.h"
 #include "ruleset.h"
 
+#include <trace/events/landlock.h>
+
 int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
 			     const u16 port, access_mask_t access_rights)
 {
@@ -36,6 +38,16 @@ int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
 
 	mutex_lock(&ruleset->lock);
 	err = landlock_insert_rule(ruleset, id, access_rights);
+
+	/*
+	 * Emit after the rule insertion succeeds, so every event corresponds
+	 * to a rule that is actually in the ruleset.  The ruleset lock is
+	 * still held for BTF consistency (enforced by lockdep_assert_held
+	 * in TP_fast_assign).
+	 */
+	if (!err)
+		trace_landlock_add_rule_net(ruleset, port, access_rights);
+
 	mutex_unlock(&ruleset->lock);
 
 	return err;
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 0d1e3dadb318..4bd997b58058 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -4,6 +4,7 @@
  *
  * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
  * Copyright © 2018-2020 ANSSI
+ * Copyright © 2026 Cloudflare
  */
 
 #include <linux/bits.h>
@@ -159,8 +160,16 @@ static void build_check_ruleset(void)
 	const struct landlock_rules rules = {
 		.num_rules = ~0,
 	};
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
+	const struct landlock_ruleset ruleset = {
+		.version = ~0,
+	};
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
 
 	BUILD_BUG_ON(rules.num_rules < LANDLOCK_MAX_NUM_RULES);
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
+	BUILD_BUG_ON(ruleset.version < LANDLOCK_MAX_NUM_RULES);
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
 }
 
 /**
@@ -293,11 +302,19 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 		/* When @level is zero, landlock_rule_insert() extends @ruleset. */
 		.level = 0,
 	} };
+	int err;
 
 	build_check_layer();
 	lockdep_assert_held(&ruleset->lock);
-	return landlock_rule_insert(&ruleset->rules, id, &layers,
-				    ARRAY_SIZE(layers));
+	err = landlock_rule_insert(&ruleset->rules, id, &layers,
+				   ARRAY_SIZE(layers));
+
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
+	if (!err)
+		ruleset->version++;
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
+
+	return err;
 }
 
 void landlock_free_rules(struct landlock_rules *const rules)
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 0d60e7fb8ff2..aa489ca9d450 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -156,6 +156,12 @@ struct landlock_ruleset {
 	refcount_t usage;
 
 #ifdef CONFIG_SECURITY_LANDLOCK_LOG
+	/**
+	 * @version: Monotonic counter incremented on each rule insertion.  Used
+	 * by tracepoints to correlate a domain with the exact ruleset state it
+	 * was created from.  Protected by @lock.
+	 */
+	u32 version;
 	/**
 	 * @id: Unique identifier for this ruleset, used for tracing.
 	 */
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 02/17] landlock: Move domain query functions to domain.c
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Grouping domain-specific code in one compilation unit reduces coupling
between domain and ruleset implementations.

Move the access-check functions that only operate on domains:
- landlock_find_rule() (from ruleset.c to domain.c)
- landlock_unmask_layers() (from ruleset.c to domain.c)
- landlock_init_layer_masks() (from ruleset.c to domain.c)
- landlock_union_access_masks() (from ruleset.h to domain.h)

These functions are called during the pathwalk and network access checks
to evaluate whether a domain grants the requested access. They do not
modify the domain or its rules.

The merge and inherit chain (merge_tree, merge_ruleset, inherit_tree,
inherit_ruleset, landlock_merge_ruleset) stays in ruleset.c for now
because it calls the static create_ruleset() allocator.  A following
commit moves it when the domain type switch eliminates the dependency on
create_ruleset().

Expand the landlock_unmask_layers() comment to document the per-layer
composition semantics.

No behavioral change.  Function signatures are unchanged; only
mechanical adjustments for the struct landlock_rules embedding
introduced by the previous commit.

Cc: Günther Noack <gnoack@google.com>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
- New patch.
---
 security/landlock/domain.c  | 150 ++++++++++++++++++++++++++++++++++++
 security/landlock/domain.h  |  38 +++++++++
 security/landlock/net.c     |   1 +
 security/landlock/ruleset.c | 135 --------------------------------
 security/landlock/ruleset.h |  38 ---------
 5 files changed, 189 insertions(+), 173 deletions(-)

diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index 378d86974ffb..cb79edf5df02 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -10,11 +10,17 @@
 #include <kunit/test.h>
 #include <linux/bitops.h>
 #include <linux/bits.h>
+#include <linux/cleanup.h>
 #include <linux/cred.h>
+#include <linux/err.h>
 #include <linux/file.h>
+#include <linux/lockdep.h>
 #include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/overflow.h>
 #include <linux/path.h>
 #include <linux/pid.h>
+#include <linux/rbtree.h>
 #include <linux/refcount.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
@@ -26,6 +32,8 @@
 #include "common.h"
 #include "domain.h"
 #include "id.h"
+#include "limits.h"
+#include "object.h"
 #include "ruleset.h"
 
 static void free_domain(struct landlock_domain *const domain)
@@ -59,6 +67,148 @@ void landlock_put_domain_deferred(struct landlock_domain *const domain)
 	}
 }
 
+/* The returned access has the same lifetime as @ruleset. */
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+		   const struct landlock_id id)
+{
+	const struct rb_root *root;
+	const struct rb_node *node;
+
+	root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
+				      id.type);
+	if (IS_ERR(root))
+		return NULL;
+	node = root->rb_node;
+
+	while (node) {
+		struct landlock_rule *this =
+			rb_entry(node, struct landlock_rule, node);
+
+		if (this->key.data == id.key.data)
+			return this;
+		if (this->key.data < id.key.data)
+			node = node->rb_right;
+		else
+			node = node->rb_left;
+	}
+	return NULL;
+}
+
+/**
+ * landlock_unmask_layers - Remove the access rights in @masks which are
+ *                          granted in @rule
+ *
+ * Updates the set of (per-layer) unfulfilled access rights @masks so that all
+ * the access rights granted in @rule are removed from it (because they are now
+ * fulfilled).
+ *
+ * @rule: A rule that grants a set of access rights for each layer.
+ * @masks: A matrix of unfulfilled access rights for each layer.
+ *
+ * Return: True if the request is allowed (i.e. the access rights granted all
+ * remaining unfulfilled access rights and masks has no leftover set bits).
+ */
+bool landlock_unmask_layers(const struct landlock_rule *const rule,
+			    struct layer_access_masks *masks)
+{
+	if (!masks)
+		return true;
+	if (!rule)
+		return false;
+
+	/*
+	 * An access is granted if, for each policy layer, at least one rule
+	 * encountered on the pathwalk grants the requested access, regardless
+	 * of its position in the layer stack.  We must then check the remaining
+	 * layers for each inode, from the first added layer to the last one.
+	 * When there are multiple requested accesses, for each policy layer,
+	 * the full set of requested accesses may not be granted by only one
+	 * rule, but by the union (binary OR) of multiple rules.  For example,
+	 * /a/b <execute> + /a <read> grants /a/b <execute + read>.
+	 *
+	 * This function is called once per matching rule during the pathwalk,
+	 * progressively clearing bits in @masks.  The overall access decision
+	 * is: access is granted iff FOR-ALL layers l, masks->access[l] == 0.
+	 * When two independent mechanisms can each grant access within a layer
+	 * (e.g. a path rule OR a scope exception), the composition must
+	 * evaluate per-layer: FOR-ALL l (A(l) OR B(l)), not (FOR-ALL l A(l)) OR
+	 * (FOR-ALL l B(l)), to prevent bypass when different layers grant via
+	 * different mechanisms.
+	 */
+	for (size_t i = 0; i < rule->num_layers; i++) {
+		const struct landlock_layer *const layer = &rule->layers[i];
+
+		/* Clear the bits where the layer in the rule grants access. */
+		masks->access[layer->level - 1] &= ~layer->access;
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
+		if (masks->access[i])
+			return false;
+	}
+	return true;
+}
+
+typedef access_mask_t
+get_access_mask_t(const struct landlock_ruleset *const ruleset,
+		  const u16 layer_level);
+
+/**
+ * landlock_init_layer_masks - Initialize layer masks from an access request
+ *
+ * Populates @masks such that for each access right in @access_request, the bits
+ * for all the layers are set where this access right is handled.
+ *
+ * @domain: The domain that defines the current restrictions.
+ * @access_request: The requested access rights to check.
+ * @masks: Layer access masks to populate.
+ * @key_type: The key type to switch between access masks of different types.
+ *
+ * Return: An access mask where each access right bit is set which is handled in
+ * any of the active layers in @domain.
+ */
+access_mask_t
+landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+			  const access_mask_t access_request,
+			  struct layer_access_masks *const masks,
+			  const enum landlock_key_type key_type)
+{
+	access_mask_t handled_accesses = 0;
+	get_access_mask_t *get_access_mask;
+
+	switch (key_type) {
+	case LANDLOCK_KEY_INODE:
+		get_access_mask = landlock_get_fs_access_mask;
+		break;
+
+#if IS_ENABLED(CONFIG_INET)
+	case LANDLOCK_KEY_NET_PORT:
+		get_access_mask = landlock_get_net_access_mask;
+		break;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+
+	/* An empty access request can happen because of O_WRONLY | O_RDWR. */
+	if (!access_request)
+		return 0;
+
+	for (size_t i = 0; i < domain->num_layers; i++) {
+		const access_mask_t handled = get_access_mask(domain, i);
+
+		masks->access[i] = access_request & handled;
+		handled_accesses |= masks->access[i];
+	}
+	for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++)
+		masks->access[i] = 0;
+
+	return handled_accesses;
+}
+
 #ifdef CONFIG_AUDIT
 
 /**
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index 66333b6122a9..afa97011ecd2 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -227,12 +227,50 @@ struct landlock_domain {
 	};
 };
 
+/**
+ * landlock_union_access_masks - Return all access rights handled in the
+ *				 domain
+ *
+ * @domain: Landlock ruleset (used as a domain)
+ *
+ * Return: An access_masks result of the OR of all the domain's access masks.
+ */
+static inline struct access_masks
+landlock_union_access_masks(const struct landlock_ruleset *const domain)
+{
+	union access_masks_all matches = {};
+	size_t layer_level;
+
+	for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
+		union access_masks_all layer = {
+			.masks = domain->access_masks[layer_level],
+		};
+
+		matches.all |= layer.all;
+	}
+
+	return matches.masks;
+}
+
 void landlock_put_domain(struct landlock_domain *const domain);
 void landlock_put_domain_deferred(struct landlock_domain *const domain);
 
 DEFINE_FREE(landlock_put_domain, struct landlock_domain *,
 	    if (!IS_ERR_OR_NULL(_T)) landlock_put_domain(_T))
 
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+		   const struct landlock_id id);
+
+bool landlock_unmask_layers(const struct landlock_rule *const rule,
+			    struct layer_access_masks *masks);
+
+access_mask_t
+landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+			  const access_mask_t access_request,
+			  struct layer_access_masks *masks,
+			  const enum landlock_key_type key_type);
+
 static inline void landlock_get_domain(struct landlock_domain *const domain)
 {
 	if (domain)
diff --git a/security/landlock/net.c b/security/landlock/net.c
index c368649985c5..34a72a4f833d 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -15,6 +15,7 @@
 #include "audit.h"
 #include "common.h"
 #include "cred.h"
+#include "domain.h"
 #include "limits.h"
 #include "net.h"
 #include "ruleset.h"
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index a6835011af2b..0cf31a7e4c7b 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -581,138 +581,3 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
 
 	return no_free_ptr(new_dom);
 }
-
-/*
- * The returned access has the same lifetime as @ruleset.
- */
-const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
-		   const struct landlock_id id)
-{
-	const struct rb_root *root;
-	const struct rb_node *node;
-
-	root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
-				      id.type);
-	if (IS_ERR(root))
-		return NULL;
-	node = root->rb_node;
-
-	while (node) {
-		struct landlock_rule *this =
-			rb_entry(node, struct landlock_rule, node);
-
-		if (this->key.data == id.key.data)
-			return this;
-		if (this->key.data < id.key.data)
-			node = node->rb_right;
-		else
-			node = node->rb_left;
-	}
-	return NULL;
-}
-
-/**
- * landlock_unmask_layers - Remove the access rights in @masks
- *                          which are granted in @rule
- *
- * Updates the set of (per-layer) unfulfilled access rights @masks
- * so that all the access rights granted in @rule are removed from it
- * (because they are now fulfilled).
- *
- * @rule: A rule that grants a set of access rights for each layer
- * @masks: A matrix of unfulfilled access rights for each layer
- *
- * Return: True if the request is allowed (i.e. the access rights granted all
- * remaining unfulfilled access rights and masks has no leftover set bits).
- */
-bool landlock_unmask_layers(const struct landlock_rule *const rule,
-			    struct layer_access_masks *masks)
-{
-	if (!masks)
-		return true;
-	if (!rule)
-		return false;
-
-	/*
-	 * An access is granted if, for each policy layer, at least one rule
-	 * encountered on the pathwalk grants the requested access,
-	 * regardless of its position in the layer stack.  We must then check
-	 * the remaining layers for each inode, from the first added layer to
-	 * the last one.  When there is multiple requested accesses, for each
-	 * policy layer, the full set of requested accesses may not be granted
-	 * by only one rule, but by the union (binary OR) of multiple rules.
-	 * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
-	 */
-	for (size_t i = 0; i < rule->num_layers; i++) {
-		const struct landlock_layer *const layer = &rule->layers[i];
-
-		/* Clear the bits where the layer in the rule grants access. */
-		masks->access[layer->level - 1] &= ~layer->access;
-	}
-
-	for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
-		if (masks->access[i])
-			return false;
-	}
-	return true;
-}
-
-typedef access_mask_t
-get_access_mask_t(const struct landlock_ruleset *const ruleset,
-		  const u16 layer_level);
-
-/**
- * landlock_init_layer_masks - Initialize layer masks from an access request
- *
- * Populates @masks such that for each access right in @access_request,
- * the bits for all the layers are set where this access right is handled.
- *
- * @domain: The domain that defines the current restrictions.
- * @access_request: The requested access rights to check.
- * @masks: Layer access masks to populate.
- * @key_type: The key type to switch between access masks of different types.
- *
- * Return: An access mask where each access right bit is set which is handled
- * in any of the active layers in @domain.
- */
-access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
-			  const access_mask_t access_request,
-			  struct layer_access_masks *const masks,
-			  const enum landlock_key_type key_type)
-{
-	access_mask_t handled_accesses = 0;
-	get_access_mask_t *get_access_mask;
-
-	switch (key_type) {
-	case LANDLOCK_KEY_INODE:
-		get_access_mask = landlock_get_fs_access_mask;
-		break;
-
-#if IS_ENABLED(CONFIG_INET)
-	case LANDLOCK_KEY_NET_PORT:
-		get_access_mask = landlock_get_net_access_mask;
-		break;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
-	default:
-		WARN_ON_ONCE(1);
-		return 0;
-	}
-
-	/* An empty access request can happen because of O_WRONLY | O_RDWR. */
-	if (!access_request)
-		return 0;
-
-	for (size_t i = 0; i < domain->num_layers; i++) {
-		const access_mask_t handled = get_access_mask(domain, i);
-
-		masks->access[i] = access_request & handled;
-		handled_accesses |= masks->access[i];
-	}
-	for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++)
-		masks->access[i] = 0;
-
-	return handled_accesses;
-}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index e7875a8b15df..1d3a9c36eb74 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -218,10 +218,6 @@ struct landlock_ruleset *
 landlock_merge_ruleset(struct landlock_ruleset *const parent,
 		       struct landlock_ruleset *const ruleset);
 
-const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
-		   const struct landlock_id id);
-
 /**
  * landlock_get_rule_root - Get the root of a rule tree by key type
  *
@@ -255,31 +251,6 @@ static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
 		refcount_inc(&ruleset->usage);
 }
 
-/**
- * landlock_union_access_masks - Return all access rights handled in the
- *				 domain
- *
- * @domain: Landlock ruleset (used as a domain)
- *
- * Return: An access_masks result of the OR of all the domain's access masks.
- */
-static inline struct access_masks
-landlock_union_access_masks(const struct landlock_ruleset *const domain)
-{
-	union access_masks_all matches = {};
-	size_t layer_level;
-
-	for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
-		union access_masks_all layer = {
-			.masks = domain->access_masks[layer_level],
-		};
-
-		matches.all |= layer.all;
-	}
-
-	return matches.masks;
-}
-
 static inline void
 landlock_add_fs_access_mask(struct landlock_ruleset *const ruleset,
 			    const access_mask_t fs_access_mask,
@@ -338,13 +309,4 @@ landlock_get_scope_mask(const struct landlock_ruleset *const ruleset,
 	return ruleset->access_masks[layer_level].scope;
 }
 
-bool landlock_unmask_layers(const struct landlock_rule *const rule,
-			    struct layer_access_masks *masks);
-
-access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
-			  const access_mask_t access_request,
-			  struct layer_access_masks *masks,
-			  const enum landlock_key_type key_type);
-
 #endif /* _SECURITY_LANDLOCK_RULESET_H */
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 06/17] landlock: Add create_ruleset and free_ruleset tracepoints
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Add tracepoints for ruleset lifecycle events: landlock_create_ruleset
fires from the landlock_create_ruleset() syscall handler, logging the
ruleset Landlock ID and handled access masks; landlock_free_ruleset
fires in free_ruleset() before the ruleset is freed, so eBPF programs
can access the full ruleset state via BTF.

The create_ruleset TP_PROTO takes only the ruleset pointer.  The handled
access masks are read from the ruleset in TP_fast_assign rather than
passed as scalar arguments, so eBPF programs can access the full ruleset
state (rules, access masks) via BTF on a single pointer.  No lock is
needed because the ruleset is not yet shared (the file descriptor has
not been installed).

Create the trace header with a DOC comment documenting the consistency
guarantees, locking conventions, TP_PROTO safety, and security
considerations shared by all Landlock tracepoints.  Add
CREATE_TRACE_POINTS in log.c to generate the tracepoint implementations.

Add an id field to struct landlock_ruleset, assigned from
landlock_get_id_range() at creation time.  Extend the CONFIG guard on
landlock_get_id_range() from CONFIG_AUDIT to
CONFIG_SECURITY_LANDLOCK_LOG so that IDs are available for tracing even
without audit support.

The deallocation events use the "free_" prefix (rather than "drop_")
because they fire when the object is actually freed.  There is no need
for allocated/deallocated symmetry because ruleset creation happens with
the landlock_create_ruleset tracepoint.

landlock_create_ruleset tracepoint.

Unlike audit records which share a record type and need a "status="
field to distinguish allocation from deallocation, tracepoints provide
one event type per lifecycle transition, each with a type-safe TP_PROTO
matching the specific transition.  This enables type-safe eBPF BTF
access and precise ftrace filtering by event name.

Cc: Günther Noack <gnoack@google.com>
Cc: Justin Suess <utilityemal77@gmail.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
- New patch (split from the v1 add_rule_fs tracepoint patch).
---
 MAINTAINERS                     |  1 +
 include/trace/events/landlock.h | 94 +++++++++++++++++++++++++++++++++
 security/landlock/id.h          |  6 +--
 security/landlock/log.c         |  5 ++
 security/landlock/ruleset.c     |  8 +++
 security/landlock/ruleset.h     |  9 ++++
 security/landlock/syscalls.c    |  5 ++
 7 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 include/trace/events/landlock.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c3fe46d7c4bc..51104faa3951 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14389,6 +14389,7 @@ F:	Documentation/admin-guide/LSM/landlock.rst
 F:	Documentation/security/landlock.rst
 F:	Documentation/userspace-api/landlock.rst
 F:	fs/ioctl.c
+F:	include/trace/events/landlock.h
 F:	include/uapi/linux/landlock.h
 F:	samples/landlock/
 F:	security/landlock/
diff --git a/include/trace/events/landlock.h b/include/trace/events/landlock.h
new file mode 100644
index 000000000000..5e847844fbf7
--- /dev/null
+++ b/include/trace/events/landlock.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2025 Microsoft Corporation
+ * Copyright © 2026 Cloudflare
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM landlock
+
+#if !defined(_TRACE_LANDLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LANDLOCK_H
+
+#include <linux/tracepoint.h>
+
+struct landlock_ruleset;
+
+/**
+ * DOC: Landlock trace events
+ *
+ * Consistency guarantee: every trace event corresponds to an operation
+ * that has irrevocably succeeded.  Lifecycle events fire only after
+ * the point of no return; denial events fire only for denials that
+ * actually happen.  This guarantees that eBPF programs observing the
+ * trace stream can build a faithful model of Landlock state without
+ * reconciliation logic.
+ *
+ * Mutable object pointers in TP_PROTO (e.g., struct landlock_ruleset
+ * for add_rule events) are passed while the caller holds the object's
+ * lock, so that TP_fast_assign and eBPF programs reading via BTF see a
+ * consistent snapshot.  For objects that are immutable at the emission
+ * site (e.g., a domain after creation), no lock is needed.
+ *
+ * All pointer arguments in TP_PROTO are guaranteed non-NULL by the
+ * caller.  eBPF programs can access these pointers via BTF for richer
+ * introspection than the TP_STRUCT__entry fields provide.
+ *
+ * TP_STRUCT__entry fields serve TP_printk display only.  eBPF programs
+ * access the raw TP_PROTO arguments directly.
+ *
+ * Security: as for audit, Landlock trace events may expose sensitive
+ * information about all sandboxed processes on the system.  See
+ * Documentation/admin-guide/LSM/landlock.rst for security considerations
+ * and privilege requirements.
+ */
+
+/**
+ * landlock_create_ruleset - new ruleset created
+ * @ruleset: Newly created ruleset (never NULL); not yet shared via an fd,
+ *           so no lock is needed.  eBPF programs can read the full ruleset
+ *           state via BTF.
+ */
+TRACE_EVENT(
+	landlock_create_ruleset,
+
+	TP_PROTO(const struct landlock_ruleset *ruleset),
+
+	TP_ARGS(ruleset),
+
+	TP_STRUCT__entry(__field(__u64, ruleset_id) __field(access_mask_t,
+							    handled_fs)
+				 __field(access_mask_t, handled_net)
+					 __field(access_mask_t, scoped)),
+
+	TP_fast_assign(__entry->ruleset_id = ruleset->id;
+		       __entry->handled_fs = ruleset->layer.fs;
+		       __entry->handled_net = ruleset->layer.net;
+		       __entry->scoped = ruleset->layer.scope;),
+
+	TP_printk("ruleset=%llx handled_fs=0x%x handled_net=0x%x scoped=0x%x",
+		  __entry->ruleset_id, __entry->handled_fs,
+		  __entry->handled_net, __entry->scoped));
+
+/**
+ * landlock_free_ruleset - Ruleset freed
+ *
+ * Emitted when a ruleset's last reference is dropped (typically when
+ * the creating process closes the ruleset file descriptor).
+ */
+TRACE_EVENT(landlock_free_ruleset,
+
+	    TP_PROTO(const struct landlock_ruleset *ruleset),
+
+	    TP_ARGS(ruleset),
+
+	    TP_STRUCT__entry(__field(__u64, ruleset_id)),
+
+	    TP_fast_assign(__entry->ruleset_id = ruleset->id;),
+
+	    TP_printk("ruleset=%llx", __entry->ruleset_id));
+
+#endif /* _TRACE_LANDLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/security/landlock/id.h b/security/landlock/id.h
index 45dcfb9e9a8b..2a43c2b523a8 100644
--- a/security/landlock/id.h
+++ b/security/landlock/id.h
@@ -8,18 +8,18 @@
 #ifndef _SECURITY_LANDLOCK_ID_H
 #define _SECURITY_LANDLOCK_ID_H
 
-#ifdef CONFIG_AUDIT
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
 
 void __init landlock_init_id(void);
 
 u64 landlock_get_id_range(size_t number_of_ids);
 
-#else /* CONFIG_AUDIT */
+#else /* CONFIG_SECURITY_LANDLOCK_LOG */
 
 static inline void __init landlock_init_id(void)
 {
 }
 
-#endif /* CONFIG_AUDIT */
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
 
 #endif /* _SECURITY_LANDLOCK_ID_H */
diff --git a/security/landlock/log.c b/security/landlock/log.c
index c9b506707af0..ef79e4ed0037 100644
--- a/security/landlock/log.c
+++ b/security/landlock/log.c
@@ -174,6 +174,11 @@ static void audit_denial(const struct landlock_cred_security *const subject,
 
 #endif /* CONFIG_AUDIT */
 
+#ifdef CONFIG_TRACEPOINTS
+#define CREATE_TRACE_POINTS
+#include <trace/events/landlock.h>
+#endif /* CONFIG_TRACEPOINTS */
+
 static struct landlock_hierarchy *
 get_hierarchy(const struct landlock_domain *const domain, const size_t layer)
 {
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index c220e0f9cf5f..0d1e3dadb318 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -22,10 +22,13 @@
 #include <linux/spinlock.h>
 
 #include "access.h"
+#include "id.h"
 #include "limits.h"
 #include "object.h"
 #include "ruleset.h"
 
+#include <trace/events/landlock.h>
+
 struct landlock_ruleset *
 landlock_create_ruleset(const access_mask_t fs_access_mask,
 			const access_mask_t net_access_mask,
@@ -49,6 +52,10 @@ landlock_create_ruleset(const access_mask_t fs_access_mask,
 	new_ruleset->rules.root_net_port = RB_ROOT;
 #endif /* IS_ENABLED(CONFIG_INET) */
 
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
+	new_ruleset->id = landlock_get_id_range(1);
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
+
 	/* Should already be checked in sys_landlock_create_ruleset(). */
 	if (fs_access_mask) {
 		WARN_ON_ONCE(fs_access_mask !=
@@ -312,6 +319,7 @@ void landlock_free_rules(struct landlock_rules *const rules)
 static void free_ruleset(struct landlock_ruleset *const ruleset)
 {
 	might_sleep();
+	trace_landlock_free_ruleset(ruleset);
 	landlock_free_rules(&ruleset->rules);
 	kfree(ruleset);
 }
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index bf127ff7496e..0d60e7fb8ff2 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -4,6 +4,7 @@
  *
  * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
  * Copyright © 2018-2020 ANSSI
+ * Copyright © 2026 Cloudflare
  */
 
 #ifndef _SECURITY_LANDLOCK_RULESET_H
@@ -153,6 +154,14 @@ struct landlock_ruleset {
 	 * @usage: Number of file descriptors referencing this ruleset.
 	 */
 	refcount_t usage;
+
+#ifdef CONFIG_SECURITY_LANDLOCK_LOG
+	/**
+	 * @id: Unique identifier for this ruleset, used for tracing.
+	 */
+	u64 id;
+#endif /* CONFIG_SECURITY_LANDLOCK_LOG */
+
 	/**
 	 * @layer: Contains the subset of filesystem and network actions that
 	 * are handled by this ruleset.
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 73ccc32d0afd..b18e83e457c2 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -38,6 +38,8 @@
 #include "setup.h"
 #include "tsync.h"
 
+#include <trace/events/landlock.h>
+
 static bool is_initialized(void)
 {
 	if (likely(landlock_initialized))
@@ -256,6 +258,9 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
 	if (IS_ERR(ruleset))
 		return PTR_ERR(ruleset);
 
+	/* Ruleset is not yet shared (FD not installed), no lock needed. */
+	trace_landlock_create_ruleset(ruleset);
+
 	/* Creates anonymous FD referring to the ruleset. */
 	ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops,
 				      ruleset, O_RDWR | O_CLOEXEC);
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 01/17] landlock: Prepare ruleset and domain type split
From: Mickaël Salaün @ 2026-04-06 14:36 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>

Rulesets and domains serve fundamentally different purposes: a ruleset
is mutable and user-facing, created by landlock_create_ruleset(), while
a domain is immutable after construction and enforced on tasks via
landlock_restrict_self().  Today both are represented by struct
landlock_ruleset, which conflates mutable and immutable state in a
single type: the lock field is unused by domains, the hierarchy field
is unused by rulesets, and lifecycle functions must handle both cases.

Prepare for a clean type split by introducing two new structures and
the helpers needed to construct domains from a separate compilation
unit:

- struct landlock_rules: holds the red-black tree roots and the rule
  count.  This storage type is shared by both rulesets and domains.
  This decouples rule storage from the domain API; the backing data
  structure could be changed independently (e.g. to a hash table,
  cf. [1]).

- struct landlock_domain: the immutable domain enforced on tasks.  It
  has no lock field because its rules and access masks are immutable
  once construction is complete.  The name reflects the role, not the
  internal data structure, to decouple the API from the
  implementation.

Embed struct landlock_rules in struct landlock_ruleset, replacing the
individual root_inode, root_net_port, and num_rules fields.  All field
accesses are updated mechanically.

Add landlock_get_rule_root() as a static inline helper in the header,
enabling constant propagation when the key type is known at compile
time.  Extract landlock_free_rules() so that free_domain() can reuse
the rule-freeing logic without duplicating it.

Add domain lifecycle functions: landlock_get_domain(),
landlock_put_domain(), and landlock_put_domain_deferred().  Move
domain.o from landlock-$(CONFIG_AUDIT) to landlock-y because these
lifecycle functions are needed unconditionally, not just for audit
logging.

No behavioral change.  The new types and lifecycle functions are not
yet used by any caller.

Cc: Günther Noack <gnoack@google.com>
Cc: Tingmao Wang <m@maowtm.org>
Link: https://lore.kernel.org/r/20250523165741.693976-1-mic@digikod.net [1]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
- New patch.
---
 security/landlock/Makefile  |  6 +--
 security/landlock/domain.c  | 35 ++++++++++++++++
 security/landlock/domain.h  | 69 +++++++++++++++++++++++++++++++
 security/landlock/ruleset.c | 71 ++++++++++++++++----------------
 security/landlock/ruleset.h | 81 +++++++++++++++++++++++++++----------
 5 files changed, 201 insertions(+), 61 deletions(-)

diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index ffa7646d99f3..23e13644916f 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -8,11 +8,11 @@ landlock-y := \
 	cred.o \
 	task.o \
 	fs.o \
-	tsync.o
+	tsync.o \
+	domain.o
 
 landlock-$(CONFIG_INET) += net.o
 
 landlock-$(CONFIG_AUDIT) += \
 	id.o \
-	audit.o \
-	domain.o
+	audit.o
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index 06b6bd845060..378d86974ffb 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -15,14 +15,49 @@
 #include <linux/mm.h>
 #include <linux/path.h>
 #include <linux/pid.h>
+#include <linux/refcount.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
+#include <linux/slab.h>
 #include <linux/uidgid.h>
+#include <linux/workqueue.h>
 
 #include "access.h"
 #include "common.h"
 #include "domain.h"
 #include "id.h"
+#include "ruleset.h"
+
+static void free_domain(struct landlock_domain *const domain)
+{
+	might_sleep();
+	landlock_free_rules(&domain->rules);
+	landlock_put_hierarchy(domain->hierarchy);
+	kfree(domain);
+}
+
+void landlock_put_domain(struct landlock_domain *const domain)
+{
+	might_sleep();
+	if (domain && refcount_dec_and_test(&domain->usage))
+		free_domain(domain);
+}
+
+static void free_domain_work(struct work_struct *const work)
+{
+	struct landlock_domain *domain;
+
+	domain = container_of(work, struct landlock_domain, work_free);
+	free_domain(domain);
+}
+
+void landlock_put_domain_deferred(struct landlock_domain *const domain)
+{
+	if (domain && refcount_dec_and_test(&domain->usage)) {
+		INIT_WORK(&domain->work_free, free_domain_work);
+		schedule_work(&domain->work_free);
+	}
+}
 
 #ifdef CONFIG_AUDIT
 
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index a9d57db0120d..66333b6122a9 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -10,6 +10,7 @@
 #ifndef _SECURITY_LANDLOCK_DOMAIN_H
 #define _SECURITY_LANDLOCK_DOMAIN_H
 
+#include <linux/cleanup.h>
 #include <linux/limits.h>
 #include <linux/mm.h>
 #include <linux/path.h>
@@ -17,9 +18,11 @@
 #include <linux/refcount.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/workqueue.h>
 
 #include "access.h"
 #include "audit.h"
+#include "ruleset.h"
 
 enum landlock_log_status {
 	LANDLOCK_LOG_PENDING = 0,
@@ -170,4 +173,70 @@ static inline void landlock_put_hierarchy(struct landlock_hierarchy *hierarchy)
 	}
 }
 
+/**
+ * struct landlock_domain - Immutable Landlock domain
+ *
+ * A domain is created from a ruleset by landlock_merge_ruleset() and enforced
+ * on a task.  Once created, its rules and access masks are immutable.  Unlike
+ * &struct landlock_ruleset, a domain has no lock field.
+ */
+struct landlock_domain {
+	/**
+	 * @rules: Red-black tree storage for rules.
+	 */
+	struct landlock_rules rules;
+	/**
+	 * @hierarchy: Enables hierarchy identification even when a parent
+	 * domain vanishes.  This is needed for the ptrace and scope
+	 * restrictions.
+	 */
+	struct landlock_hierarchy *hierarchy;
+	union {
+		/**
+		 * @work_free: Enables to free a domain within a lockless
+		 * section.  This is only used by landlock_put_domain_deferred()
+		 * when @usage reaches zero.  The fields @usage, @num_layers and
+		 * @access_masks are then unused.
+		 */
+		struct work_struct work_free;
+		struct {
+			/**
+			 * @usage: Number of credentials referencing this
+			 * domain.
+			 */
+			refcount_t usage;
+			/**
+			 * @num_layers: Number of layers that are used in this
+			 * domain.  This enables to check that all the layers
+			 * allow an access request.
+			 */
+			u32 num_layers;
+			/**
+			 * @access_masks: Contains the subset of filesystem and
+			 * network actions that are restricted by a domain.  A
+			 * domain saves all layers of merged rulesets in a stack
+			 * (FAM), starting from the first layer to the last one.
+			 * These layers are used when merging rulesets, for user
+			 * space backward compatibility (i.e. future-proof), and
+			 * to properly handle merged rulesets without
+			 * overlapping access rights.  These layers are set once
+			 * and never changed for the lifetime of the domain.
+			 */
+			struct access_masks access_masks[];
+		};
+	};
+};
+
+void landlock_put_domain(struct landlock_domain *const domain);
+void landlock_put_domain_deferred(struct landlock_domain *const domain);
+
+DEFINE_FREE(landlock_put_domain, struct landlock_domain *,
+	    if (!IS_ERR_OR_NULL(_T)) landlock_put_domain(_T))
+
+static inline void landlock_get_domain(struct landlock_domain *const domain)
+{
+	if (domain)
+		refcount_inc(&domain->usage);
+}
+
 #endif /* _SECURITY_LANDLOCK_DOMAIN_H */
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 181df7736bb9..a6835011af2b 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -38,16 +38,16 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
 		return ERR_PTR(-ENOMEM);
 	refcount_set(&new_ruleset->usage, 1);
 	mutex_init(&new_ruleset->lock);
-	new_ruleset->root_inode = RB_ROOT;
+	new_ruleset->rules.root_inode = RB_ROOT;
 
 #if IS_ENABLED(CONFIG_INET)
-	new_ruleset->root_net_port = RB_ROOT;
+	new_ruleset->rules.root_net_port = RB_ROOT;
 #endif /* IS_ENABLED(CONFIG_INET) */
 
 	new_ruleset->num_layers = num_layers;
 	/*
 	 * hierarchy = NULL
-	 * num_rules = 0
+	 * rules.num_rules = 0
 	 * access_masks[] = 0
 	 */
 	return new_ruleset;
@@ -147,19 +147,7 @@ create_rule(const struct landlock_id id,
 static struct rb_root *get_root(struct landlock_ruleset *const ruleset,
 				const enum landlock_key_type key_type)
 {
-	switch (key_type) {
-	case LANDLOCK_KEY_INODE:
-		return &ruleset->root_inode;
-
-#if IS_ENABLED(CONFIG_INET)
-	case LANDLOCK_KEY_NET_PORT:
-		return &ruleset->root_net_port;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
-	default:
-		WARN_ON_ONCE(1);
-		return ERR_PTR(-EINVAL);
-	}
+	return landlock_get_rule_root(&ruleset->rules, key_type);
 }
 
 static void free_rule(struct landlock_rule *const rule,
@@ -175,19 +163,24 @@ static void free_rule(struct landlock_rule *const rule,
 
 static void build_check_ruleset(void)
 {
-	const struct landlock_ruleset ruleset = {
+	const struct landlock_rules rules = {
 		.num_rules = ~0,
+	};
+	const struct landlock_ruleset ruleset = {
 		.num_layers = ~0,
 	};
 
-	BUILD_BUG_ON(ruleset.num_rules < LANDLOCK_MAX_NUM_RULES);
+	BUILD_BUG_ON(rules.num_rules < LANDLOCK_MAX_NUM_RULES);
 	BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
 }
 
 /**
- * insert_rule - Create and insert a rule in a ruleset
+ * insert_rule - Create and insert a rule in a rule set
  *
- * @ruleset: The ruleset to be updated.
+ * @rules: The rule storage to be updated.  The caller is responsible for
+ *         any required locking.  For rulesets, this means holding
+ *         landlock_ruleset.lock.  For domains under construction, no lock is
+ *         needed because the domain is not yet visible to other tasks.
  * @id: The ID to build the new rule with.  The underlying kernel object, if
  *      any, must be held by the caller.
  * @layers: One or multiple layers to be copied into the new rule.
@@ -195,16 +188,16 @@ static void build_check_ruleset(void)
  *
  * When user space requests to add a new rule to a ruleset, @layers only
  * contains one entry and this entry is not assigned to any level.  In this
- * case, the new rule will extend @ruleset, similarly to a boolean OR between
+ * case, the new rule will extend @rules, similarly to a boolean OR between
  * access rights.
  *
  * When merging a ruleset in a domain, or copying a domain, @layers will be
- * added to @ruleset as new constraints, similarly to a boolean AND between
- * access rights.
+ * added to @rules as new constraints, similarly to a boolean AND between access
+ * rights.
  *
  * Return: 0 on success, -errno on failure.
  */
-static int insert_rule(struct landlock_ruleset *const ruleset,
+static int insert_rule(struct landlock_rules *const rules,
 		       const struct landlock_id id,
 		       const struct landlock_layer (*layers)[],
 		       const size_t num_layers)
@@ -215,14 +208,13 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
 	struct rb_root *root;
 
 	might_sleep();
-	lockdep_assert_held(&ruleset->lock);
 	if (WARN_ON_ONCE(!layers))
 		return -ENOENT;
 
 	if (is_object_pointer(id.type) && WARN_ON_ONCE(!id.key.object))
 		return -ENOENT;
 
-	root = get_root(ruleset, id.type);
+	root = landlock_get_rule_root(rules, id.type);
 	if (IS_ERR(root))
 		return PTR_ERR(root);
 
@@ -248,7 +240,7 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
 		if ((*layers)[0].level == 0) {
 			/*
 			 * Extends access rights when the request comes from
-			 * landlock_add_rule(2), i.e. @ruleset is not a domain.
+			 * landlock_add_rule(2), i.e. contained by a ruleset.
 			 */
 			if (WARN_ON_ONCE(this->num_layers != 1))
 				return -EINVAL;
@@ -276,14 +268,14 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
 
 	/* There is no match for @id. */
 	build_check_ruleset();
-	if (ruleset->num_rules >= LANDLOCK_MAX_NUM_RULES)
+	if (rules->num_rules >= LANDLOCK_MAX_NUM_RULES)
 		return -E2BIG;
 	new_rule = create_rule(id, layers, num_layers, NULL);
 	if (IS_ERR(new_rule))
 		return PTR_ERR(new_rule);
 	rb_link_node(&new_rule->node, parent_node, walker_node);
 	rb_insert_color(&new_rule->node, root);
-	ruleset->num_rules++;
+	rules->num_rules++;
 	return 0;
 }
 
@@ -314,7 +306,8 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 	} };
 
 	build_check_layer();
-	return insert_rule(ruleset, id, &layers, ARRAY_SIZE(layers));
+	lockdep_assert_held(&ruleset->lock);
+	return insert_rule(&ruleset->rules, id, &layers, ARRAY_SIZE(layers));
 }
 
 static int merge_tree(struct landlock_ruleset *const dst,
@@ -352,7 +345,7 @@ static int merge_tree(struct landlock_ruleset *const dst,
 
 		layers[0].access = walker_rule->layers[0].access;
 
-		err = insert_rule(dst, id, &layers, ARRAY_SIZE(layers));
+		err = insert_rule(&dst->rules, id, &layers, ARRAY_SIZE(layers));
 		if (err)
 			return err;
 	}
@@ -426,7 +419,7 @@ static int inherit_tree(struct landlock_ruleset *const parent,
 			.type = key_type,
 		};
 
-		err = insert_rule(child, id, &walker_rule->layers,
+		err = insert_rule(&child->rules, id, &walker_rule->layers,
 				  walker_rule->num_layers);
 		if (err)
 			return err;
@@ -480,21 +473,26 @@ static int inherit_ruleset(struct landlock_ruleset *const parent,
 	return err;
 }
 
-static void free_ruleset(struct landlock_ruleset *const ruleset)
+void landlock_free_rules(struct landlock_rules *const rules)
 {
 	struct landlock_rule *freeme, *next;
 
 	might_sleep();
-	rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root_inode,
+	rbtree_postorder_for_each_entry_safe(freeme, next, &rules->root_inode,
 					     node)
 		free_rule(freeme, LANDLOCK_KEY_INODE);
 
 #if IS_ENABLED(CONFIG_INET)
 	rbtree_postorder_for_each_entry_safe(freeme, next,
-					     &ruleset->root_net_port, node)
+					     &rules->root_net_port, node)
 		free_rule(freeme, LANDLOCK_KEY_NET_PORT);
 #endif /* IS_ENABLED(CONFIG_INET) */
+}
 
+static void free_ruleset(struct landlock_ruleset *const ruleset)
+{
+	might_sleep();
+	landlock_free_rules(&ruleset->rules);
 	landlock_put_hierarchy(ruleset->hierarchy);
 	kfree(ruleset);
 }
@@ -594,7 +592,8 @@ landlock_find_rule(const struct landlock_ruleset *const ruleset,
 	const struct rb_root *root;
 	const struct rb_node *node;
 
-	root = get_root((struct landlock_ruleset *)ruleset, id.type);
+	root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
+				      id.type);
 	if (IS_ERR(root))
 		return NULL;
 	node = root->rb_node;
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 889f4b30301a..e7875a8b15df 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -57,13 +57,12 @@ union landlock_key {
  */
 enum landlock_key_type {
 	/**
-	 * @LANDLOCK_KEY_INODE: Type of &landlock_ruleset.root_inode's node
-	 * keys.
+	 * @LANDLOCK_KEY_INODE: Type of &landlock_rules.root_inode's node keys.
 	 */
 	LANDLOCK_KEY_INODE = 1,
 	/**
-	 * @LANDLOCK_KEY_NET_PORT: Type of &landlock_ruleset.root_net_port's
-	 * node keys.
+	 * @LANDLOCK_KEY_NET_PORT: Type of &landlock_rules.root_net_port's node
+	 * keys.
 	 */
 	LANDLOCK_KEY_NET_PORT,
 };
@@ -111,30 +110,44 @@ struct landlock_rule {
 };
 
 /**
- * struct landlock_ruleset - Landlock ruleset
+ * struct landlock_rules - Red-black tree storage for Landlock rules
  *
- * This data structure must contain unique entries, be updatable, and quick to
- * match an object.
+ * This structure holds the rule trees shared by both rulesets and domains.
  */
-struct landlock_ruleset {
+struct landlock_rules {
 	/**
 	 * @root_inode: Root of a red-black tree containing &struct
-	 * landlock_rule nodes with inode object.  Once a ruleset is tied to a
-	 * process (i.e. as a domain), this tree is immutable until @usage
-	 * reaches zero.
+	 * landlock_rule nodes with inode object.  Immutable for domains.
 	 */
 	struct rb_root root_inode;
 
 #if IS_ENABLED(CONFIG_INET)
 	/**
 	 * @root_net_port: Root of a red-black tree containing &struct
-	 * landlock_rule nodes with network port. Once a ruleset is tied to a
-	 * process (i.e. as a domain), this tree is immutable until @usage
-	 * reaches zero.
+	 * landlock_rule nodes with network port.  Immutable for domains.
 	 */
 	struct rb_root root_net_port;
 #endif /* IS_ENABLED(CONFIG_INET) */
 
+	/**
+	 * @num_rules: Number of non-overlapping (i.e. not for the same object)
+	 * rules in this tree storage.
+	 */
+	u32 num_rules;
+};
+
+/**
+ * struct landlock_ruleset - Landlock ruleset
+ *
+ * This data structure must contain unique entries, be updatable, and quick to
+ * match an object.
+ */
+struct landlock_ruleset {
+	/**
+	 * @rules: Red-black tree storage for rules.
+	 */
+	struct landlock_rules rules;
+
 	/**
 	 * @hierarchy: Enables hierarchy identification even when a parent
 	 * domain vanishes.  This is needed for the ptrace protection.
@@ -144,9 +157,9 @@ struct landlock_ruleset {
 		/**
 		 * @work_free: Enables to free a ruleset within a lockless
 		 * section.  This is only used by
-		 * landlock_put_ruleset_deferred() when @usage reaches zero.
-		 * The fields @lock, @usage, @num_rules, @num_layers and
-		 * @access_masks are then unused.
+		 * landlock_put_ruleset_deferred() when @usage reaches zero. The
+		 * fields @lock, @usage, @num_layers and @access_masks are then
+		 * unused.
 		 */
 		struct work_struct work_free;
 		struct {
@@ -160,11 +173,6 @@ struct landlock_ruleset {
 			 * descriptors referencing this ruleset.
 			 */
 			refcount_t usage;
-			/**
-			 * @num_rules: Number of non-overlapping (i.e. not for
-			 * the same object) rules in this ruleset.
-			 */
-			u32 num_rules;
 			/**
 			 * @num_layers: Number of layers that are used in this
 			 * ruleset.  This enables to check that all the layers
@@ -204,6 +212,8 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 			 const struct landlock_id id,
 			 const access_mask_t access);
 
+void landlock_free_rules(struct landlock_rules *const rules);
+
 struct landlock_ruleset *
 landlock_merge_ruleset(struct landlock_ruleset *const parent,
 		       struct landlock_ruleset *const ruleset);
@@ -212,6 +222,33 @@ const struct landlock_rule *
 landlock_find_rule(const struct landlock_ruleset *const ruleset,
 		   const struct landlock_id id);
 
+/**
+ * landlock_get_rule_root - Get the root of a rule tree by key type
+ *
+ * @rules: The rules storage to look up.
+ * @key_type: The type of key to select the tree for.
+ *
+ * Return: A pointer to the rb_root, or ERR_PTR(-EINVAL) on unknown type.
+ */
+static inline struct rb_root *
+landlock_get_rule_root(struct landlock_rules *const rules,
+		       const enum landlock_key_type key_type)
+{
+	switch (key_type) {
+	case LANDLOCK_KEY_INODE:
+		return &rules->root_inode;
+
+#if IS_ENABLED(CONFIG_INET)
+	case LANDLOCK_KEY_NET_PORT:
+		return &rules->root_net_port;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+	default:
+		WARN_ON_ONCE(1);
+		return ERR_PTR(-EINVAL);
+	}
+}
+
 static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
 {
 	if (ruleset)
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 00/17] Landlock tracepoints
From: Mickaël Salaün @ 2026-04-06 14:36 UTC (permalink / raw)
  To: Christian Brauner, Günther Noack, Steven Rostedt
  Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
	Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
	Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
	linux-security-module, linux-trace-kernel

Hi,

This series adds 13 tracepoints that cover the full Landlock lifecycle,
from ruleset creation to domain destruction.  They can be used directly
via /sys/kernel/tracing/events/landlock/* or attached by eBPF programs
for richer introspection.

Patches 1-4 refactor Landlock internals: they split struct
landlock_domain from struct landlock_ruleset and move denial logging
into a common framework shared by audit and tracing.  Patch 5 adds
__print_untrusted_str() to the tracing core.  Patches 6-9 add
lifecycle tracepoints: ruleset creation and destruction, rule addition
for filesystem and network, domain enforcement and destruction, and
per-rule access checks.  Patch 10 sets audit_net.sk for socket access
checks.  Patches 11-12 add denial tracepoints for filesystem, network,
and scope operations.  Patches 13-16 add selftests and patch 17 adds
documentation.

Each rule type has a dedicated tracepoint with strongly-typed fields
(dev/ino for filesystem, port for network), following the same approach
as the audit logs.

This feature is useful to troubleshoot policy issues and should limit
the need for custom debugging kernel code when developing new Landlock
features.

Landlock already has audit support for logging denied access requests,
which is useful to identify security issues or sandbox misconfiguration.
However, audit might not be enough to debug Landlock policies.  The
main difference with audit events is that traces are disabled by
default, can be very verbose, and can be filtered according to process
and Landlock properties (e.g. domain ID).

As for audit, tracing may expose sensitive information about all
sandboxed processes on the system, and must only be accessible to the
system administrator.  For unprivileged monitoring scoped to a single
sandbox (e.g., interactive permission prompts), Tingmao Wang's
"Landlock supervise" RFC [1] proposes a dedicated userspace API.  The
infrastructure changes in this series (the domain type split, the
denial framework, and the tracepoint consistency guarantees) benefit
that approach.

I will release a companion tool that leverages these tracepoints to
monitor Landlock events in real time.

This series applies on top of my next branch [2].

Changes since RFC v1:
https://lore.kernel.org/r/20250523165741.693976-1-mic@digikod.net
- New patches 1-4: split struct landlock_domain from struct
  landlock_ruleset; split denial logging from audit into common
  framework with CONFIG_SECURITY_LANDLOCK_LOG.
- Patch 5 (was v1 3/5): removed WARN_ON() (pointed out by Steven
  Rostedt).
- New patch 6: added create_ruleset and free_ruleset tracepoints
  (split from the v1 add_rule_fs tracepoint patch).
- Patch 7 (was v1 4/5): added add_rule_net tracepoint, used
  ruleset Landlock ID instead of kernel pointer, added version
  field to struct landlock_ruleset, differentiated d_absolute_path()
  error cases (suggested by Tingmao Wang), moved
  DEFINE_FREE(__putname) to include/linux/fs.h (noticed by Tingmao
  Wang).
- New patch 8: added restrict_self and free_domain tracepoints.
- Patch 9 (was v1 5/5): merged find-rule consolidation, added
  check_rule_net tracepoint.
- New patch 10: split audit_net.sk fix with Fixes: tag.
- New patches 11-12: added denial tracepoints for filesystem,
  network, ptrace, and scope operations.
- New patches 13-17: split selftests into per-feature commits with
  documentation.

Regards,

Mickaël Salaün (17):
  landlock: Prepare ruleset and domain type split
  landlock: Move domain query functions to domain.c
  landlock: Split struct landlock_domain from struct landlock_ruleset
  landlock: Split denial logging from audit into common framework
  tracing: Add __print_untrusted_str()
  landlock: Add create_ruleset and free_ruleset tracepoints
  landlock: Add landlock_add_rule_fs and landlock_add_rule_net
    tracepoints
  landlock: Add restrict_self and free_domain tracepoints
  landlock: Add tracepoints for rule checking
  landlock: Set audit_net.sk for socket access checks
  landlock: Add landlock_deny_access_fs and landlock_deny_access_net
  landlock: Add tracepoints for ptrace and scope denials
  selftests/landlock: Add trace event test infrastructure and tests
  selftests/landlock: Add filesystem tracepoint tests
  selftests/landlock: Add network tracepoint tests
  selftests/landlock: Add scope and ptrace tracepoint tests
  landlock: Document tracepoints

 Documentation/admin-guide/LSM/landlock.rst    |  210 ++-
 Documentation/security/landlock.rst           |   35 +-
 Documentation/trace/events-landlock.rst       |  160 +++
 Documentation/trace/index.rst                 |    1 +
 Documentation/userspace-api/landlock.rst      |   11 +-
 MAINTAINERS                                   |    1 +
 include/linux/fs.h                            |    1 +
 include/linux/trace_events.h                  |    2 +
 include/trace/events/landlock.h               |  574 ++++++++
 include/trace/stages/stage3_trace_output.h    |    4 +
 include/trace/stages/stage7_class_define.h    |    1 +
 kernel/trace/trace_output.c                   |   41 +
 security/landlock/Kconfig                     |    5 +
 security/landlock/Makefile                    |   10 +-
 security/landlock/access.h                    |    4 +-
 security/landlock/cred.c                      |    6 +-
 security/landlock/cred.h                      |   29 +-
 security/landlock/domain.c                    |  445 ++++++-
 security/landlock/domain.h                    |  148 ++-
 security/landlock/fs.c                        |  201 ++-
 security/landlock/fs.h                        |   30 +
 security/landlock/id.h                        |    6 +-
 security/landlock/{audit.c => log.c}          |  261 +++-
 security/landlock/{audit.h => log.h}          |   25 +-
 security/landlock/net.c                       |   40 +-
 security/landlock/ruleset.c                   |  528 ++------
 security/landlock/ruleset.h                   |  237 ++--
 security/landlock/syscalls.c                  |   36 +-
 security/landlock/task.c                      |   22 +-
 tools/testing/selftests/landlock/audit.h      |   35 +-
 tools/testing/selftests/landlock/audit_test.c |  187 +++
 tools/testing/selftests/landlock/common.h     |   47 +
 tools/testing/selftests/landlock/config       |    2 +
 tools/testing/selftests/landlock/fs_test.c    |  218 +++
 tools/testing/selftests/landlock/net_test.c   |  547 +++++++-
 .../testing/selftests/landlock/ptrace_test.c  |  164 +++
 .../landlock/scoped_abstract_unix_test.c      |  195 +++
 .../selftests/landlock/scoped_signal_test.c   |  150 +++
 tools/testing/selftests/landlock/trace.h      |  640 +++++++++
 .../selftests/landlock/trace_fs_test.c        |  390 ++++++
 tools/testing/selftests/landlock/trace_test.c | 1168 +++++++++++++++++
 tools/testing/selftests/landlock/true.c       |   10 +
 42 files changed, 5991 insertions(+), 836 deletions(-)
 create mode 100644 Documentation/trace/events-landlock.rst
 create mode 100644 include/trace/events/landlock.h
 rename security/landlock/{audit.c => log.c} (73%)
 rename security/landlock/{audit.h => log.h} (74%)
 create mode 100644 tools/testing/selftests/landlock/trace.h
 create mode 100644 tools/testing/selftests/landlock/trace_fs_test.c
 create mode 100644 tools/testing/selftests/landlock/trace_test.c


base-commit: 8c6a27e02bc55ab110d1828610048b19f903aaec
-- 
2.53.0


^ permalink raw reply

* Re: [RFC PATCH 3/4] livepatch: Add "replaceable" attribute to klp_patch
From: Yafang Shao @ 2026-04-06 11:08 UTC (permalink / raw)
  To: Song Liu
  Cc: Dylan Hatch, jpoimboe, jikos, mbenes, pmladek, joe.lawrence,
	rostedt, mhiramat, mathieu.desnoyers, kpsingh, mattbobrowski,
	jolsa, ast, daniel, andrii, martin.lau, eddyz87, memxor,
	yonghong.song, live-patching, linux-kernel, linux-trace-kernel,
	bpf
In-Reply-To: <CAPhsuW6p3YOv3_M_c0ThMcrNqNjT=7i46ekJBrWO_oGzQkxrxA@mail.gmail.com>

On Sat, Apr 4, 2026 at 5:36 AM Song Liu <song@kernel.org> wrote:
>
> On Fri, Apr 3, 2026 at 1:55 PM Dylan Hatch <dylanbhatch@google.com> wrote:
> [...]
> > > IIRC, the use case for this change is when multiple users load various
> > > livepatch modules on the same system. I still don't believe this is the
> > > right way to manage livepatches. That said, I won't really NACK this
> > > if other folks think this is a useful option.
> >
> > In our production fleet, we apply exactly one cumulative livepatch
> > module, and we use per-kernel build "livepatch release" branches to
> > track the contents of these cumulative livepatches. This model has
> > worked relatively well for us, but there are some painpoints.
> >
> > We are often under pressure to selectively deploy a livepatch fix to
> > certain subpopulations of production. If the subpopulation is running
> > the same build of everything else, this would require us to introduce
> > another branching factor to the "livepatch release" branches --
> > something we do not support due to the added toil and complexity.
> >
> > However, if we had the ability to build "off-band" livepatch modules
> > that were marked as non-replaceable, we could support these selective
> > patches without the additional branching factor. I will have to
> > circulate the idea internally, but to me this seems like a very useful
> > option to have in certain cases.
>
>  IIUC, the plan is:
>
> - The regular livepatches are cumulative, have the replace flag; and
>   are replaceable.
> - The occasional "off-band" livepatches do not have the replace flag,
>   and are not replaceable.
>
> With this setup, for systems with off-band livepatches loaded, we can
> still release a cumulative livepatch to replace the previous cumulative
> livepatch. Is this the expected use case?

That matches our expected use case.

>
> I think there are a few issues with this:
> 1. The "off-band" livepatches cannot be replaced atomically. To upgrade
>    "off-band' livepatches, we will have to unload the old version and load
>    the new version later.

Right. That is how the non-atomic-replace patch works.

> 2. Any conflict with the off-band livepatches and regular livepatches will
>    be difficult to manage.

We need to manage this conflict with a complex user script. That said,
everything can be controlled from userspace.

> IOW, we kind removed the benefit of cumulative
>    livepatches. For example, what shall we do if we really need two fixes
>    to the same kernel functions: one from the original branch, the other
>    from the off-band branch?

We run tens of livepatches on our production servers and have never
run into this issue. It's an extremely rare case — and if it does
happen, a user script should be able to handle it just fine.

-- 
Regards
Yafang

^ permalink raw reply

* Re: [RFC PATCH 0/4] trace, livepatch: Allow kprobe return overriding for livepatched functions
From: Yafang Shao @ 2026-04-06 10:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, song, jolsa, ast,
	daniel, andrii, martin.lau, eddyz87, memxor, yonghong.song,
	live-patching, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <adNGXfRI84mZrUSs@infradead.org>

On Mon, Apr 6, 2026 at 1:36 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Thu, Apr 02, 2026 at 05:26:03PM +0800, Yafang Shao wrote:
> > Livepatching allows for rapid experimentation with new kernel features
> > without interrupting production workloads.
>
> Myabe it allows, or based on the rest of the mail not quite.  But that
> is certainly not the intent at all, the intent is to fix critical
> bugs without downtime.
>
> > However, static livepatches lack
> > the flexibility required to tune features based on task-specific attributes,
> > such as cgroup membership, which is critical in multi-tenant k8s
> > environments. Furthermore, hardcoding logic into a livepatch prevents
> > dynamic adjustments based on the runtime environment.
> >
> > To address this, we propose a hybrid approach using BPF. Our production use
> > case involves:
> >
> > 1. Deploying a Livepatch function to serve as a stable BPF hook.
> >
> > 2. Utilizing bpf_override_return() to dynamically modify the return value
> >    of that hook based on the current task's context.
>
> Whol f**. now.  Is this a delayed April 1st post?

You're already in my spam list. Don't expect any further replies. Feel
free to keep your verbose rubbish to yourself.

-- 
Regards
Yafang

^ permalink raw reply

* Re: [RFC PATCH 0/4] trace, livepatch: Allow kprobe return overriding for livepatched functions
From: Yafang Shao @ 2026-04-06 10:55 UTC (permalink / raw)
  To: Song Liu
  Cc: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, rostedt, mhiramat,
	mathieu.desnoyers, kpsingh, mattbobrowski, jolsa, ast, daniel,
	andrii, martin.lau, eddyz87, memxor, yonghong.song, live-patching,
	linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <CAPhsuW7Y5KksWM49TrGH_Hohaq02XO8qs7G99Y6D8=0usLFSrQ@mail.gmail.com>

On Sat, Apr 4, 2026 at 12:07 AM Song Liu <song@kernel.org> wrote:
>
> Hi Yafang,
>
> On Thu, Apr 2, 2026 at 2:26 AM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > Livepatching allows for rapid experimentation with new kernel features
> > without interrupting production workloads. However, static livepatches lack
> > the flexibility required to tune features based on task-specific attributes,
> > such as cgroup membership, which is critical in multi-tenant k8s
> > environments. Furthermore, hardcoding logic into a livepatch prevents
> > dynamic adjustments based on the runtime environment.
> >
> > To address this, we propose a hybrid approach using BPF. Our production use
> > case involves:
> >
> > 1. Deploying a Livepatch function to serve as a stable BPF hook.
> >
> > 2. Utilizing bpf_override_return() to dynamically modify the return value
> >    of that hook based on the current task's context.
>
> Could you please provide a specific use case that can benefit from this?
> AFAICT, livepatch is more flexible but risky (may cause crash); while
> BPF is safe, but less flexible. The combination you are proposing seems
> to get the worse of the two sides. Maybe it can indeed get the benefit of
> both sides in some cases, but I cannot think of such examples.
>

Here is an example we recently deployed on our production servers:

  https://lore.kernel.org/bpf/CALOAHbDnNba_w_nWH3-S9GAXw0+VKuLTh1gy5hy9Yqgeo4C0iA@mail.gmail.com/

In one of our specific clusters, we needed to send BGP traffic out
through specific NICs based on the destination IP. To achieve this
without interrupting service, we live-patched
bond_xmit_3ad_xor_slave_get(), added a new hook called
bond_get_slave_hook(), and then ran a BPF program attached to that
hook to select the outgoing NIC from the SKB. This allowed us to
rapidly deploy the feature with zero downtime.

-- 
Regards
Yafang

^ permalink raw reply

* [PATCH v16 5/5] ring-buffer: Show commit numbers in buffer_meta file
From: Masami Hiramatsu (Google) @ 2026-04-06 10:24 UTC (permalink / raw)
  To: Steven Rostedt, Catalin Marinas, Will Deacon
  Cc: Masami Hiramatsu, Mathieu Desnoyers, linux-kernel,
	linux-trace-kernel, Ian Rogers, linux-arm-kernel
In-Reply-To: <177547105523.259641.14385891517704197263.stgit@mhiramat.tok.corp.google.com>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

In addition to the index number, show the commit numbers of
each data page in the per_cpu buffer_meta file.
This is useful for understanding the current status of the
persistent ring buffer. (Note that this file is shown
only for persistent ring buffer and its backup instance)

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v16:
  - update description.
---
 kernel/trace/ring_buffer.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e56fe9dcc7d7..4bf83b7805da 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2209,6 +2209,7 @@ static int rbm_show(struct seq_file *m, void *v)
 	struct ring_buffer_per_cpu *cpu_buffer = m->private;
 	struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
 	unsigned long val = (unsigned long)v;
+	struct buffer_data_page *dpage;
 
 	if (val == 1) {
 		seq_printf(m, "head_buffer:   %d\n",
@@ -2221,7 +2222,9 @@ static int rbm_show(struct seq_file *m, void *v)
 	}
 
 	val -= 2;
-	seq_printf(m, "buffer[%ld]:    %d\n", val, meta->buffers[val]);
+	dpage = rb_range_buffer(cpu_buffer, val);
+	seq_printf(m, "buffer[%ld]:    %d (commit: %ld)\n",
+		   val, meta->buffers[val], local_read(&dpage->commit));
 
 	return 0;
 }


^ permalink raw reply related

* [PATCH v16 4/5] ring-buffer: Add persistent ring buffer invalid-page inject test
From: Masami Hiramatsu (Google) @ 2026-04-06 10:24 UTC (permalink / raw)
  To: Steven Rostedt, Catalin Marinas, Will Deacon
  Cc: Masami Hiramatsu, Mathieu Desnoyers, linux-kernel,
	linux-trace-kernel, Ian Rogers, linux-arm-kernel
In-Reply-To: <177547105523.259641.14385891517704197263.stgit@mhiramat.tok.corp.google.com>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Add a self-corrupting test for the persistent ring buffer.

This will inject an erroneous value to some sub-buffer pages (where
the index is even or multiples of 5) in the persistent ring buffer
when the kernel panics, and checks whether the number of detected
invalid pages and the total entry_bytes are the same as the recorded
values after reboot.

This ensures that the kernel can correctly recover a partially
corrupted persistent ring buffer after a reboot or panic.

The test only runs on the persistent ring buffer whose name is
"ptracingtest". The user has to fill it with events before a
kernel panic.

To run the test, enable CONFIG_RING_BUFFER_PERSISTENT_INJECT
and add the following kernel cmdline:

 reserve_mem=20M:2M:trace trace_instance=ptracingtest^traceoff@trace
 panic=1

Run the following commands after the 1st boot:

 cd /sys/kernel/tracing/instances/ptracingtest
 echo 1 > tracing_on
 echo 1 > events/enable
 sleep 3
 echo c > /proc/sysrq-trigger

After panic message, the kernel will reboot and run the verification
on the persistent ring buffer, e.g.

 Ring buffer meta [2] invalid buffer page detected
 Ring buffer meta [2] is from previous boot! (318 pages discarded)
 Ring buffer testing [2] invalid pages: PASSED (318/318)
 Ring buffer testing [2] entry_bytes: PASSED (1300476/1300476)

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v16:
  - Update description and comments according to review comments.
 Changes in v15:
  - Use pr_warn() for test result.
  - Inject errors on the page index is multiples of 5 so that
    this can reproduce contiguous empty pages.
 Changes in v14:
  - Rename config to CONFIG_RING_BUFFER_PERSISTENT_INJECT.
  - Clear meta->nr_invalid/entry_bytes after testing.
  - Add test commands in config comment.
 Changes in v10:
  - Add entry_bytes test.
  - Do not compile test code if CONFIG_RING_BUFFER_PERSISTENT_SELFTEST=n.
 Changes in v9:
  - Test also reader pages.
---
 include/linux/ring_buffer.h |    1 +
 kernel/trace/Kconfig        |   34 ++++++++++++++++++++
 kernel/trace/ring_buffer.c  |   74 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.c        |    4 ++
 4 files changed, 113 insertions(+)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 994f52b34344..0670742b2d60 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -238,6 +238,7 @@ int ring_buffer_subbuf_size_get(struct trace_buffer *buffer);
 
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
+	RB_FL_TESTING		= 1 << 1,
 };
 
 #ifdef CONFIG_RING_BUFFER
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e130da35808f..084f34dc6c9f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1202,6 +1202,40 @@ config RING_BUFFER_VALIDATE_TIME_DELTAS
 	  Only say Y if you understand what this does, and you
 	  still want it enabled. Otherwise say N
 
+config RING_BUFFER_PERSISTENT_INJECT
+	bool "Enable persistent ring buffer error injection test"
+	depends on RING_BUFFER
+	help
+	  This option will have the kernel check if the persistent ring
+	  buffer is named "ptracingtest". and if so, it will corrupt some
+	  of its pages on a kernel panic. This is used to test if the
+	  persistent ring buffer can recover from some of its sub-buffers
+	  being corrupted.
+	  To use this, boot a kernel with a "ptracingtest" persistent
+	  ring buffer, e.g.
+
+	   reserve_mem=20M:2M:trace trace_instance=ptracingtest@trace panic=1
+
+	  And after the 1st boot, run the following commands:
+
+	   cd /sys/kernel/tracing/instances/ptracingtest
+	   echo 1 > events/enable
+	   echo 1 > tracing_on
+	   sleep 3
+	   echo c > /proc/sysrq-trigger
+
+	  After the panic message, the kernel will reboot and will show
+	  the test results in the console output.
+
+	  Note that events for the test ring buffer needs to be enabled
+	  prior to crashing the kernel so that the ring buffer has content
+	  that the test will corrupt.
+	  As the test will corrupt events in the "ptracingtest" persistent
+	  ring buffer, it should not be used for any other purpose other
+	  than this test.
+
+	  If unsure, say N
+
 config MMIOTRACE_TEST
 	tristate "Test module for mmiotrace"
 	depends on MMIOTRACE && m
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 518a05df6ef7..e56fe9dcc7d7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -64,6 +64,10 @@ struct ring_buffer_cpu_meta {
 	unsigned long	commit_buffer;
 	__u32		subbuf_size;
 	__u32		nr_subbufs;
+#ifdef CONFIG_RING_BUFFER_PERSISTENT_INJECT
+	__u32		nr_invalid;
+	__u32		entry_bytes;
+#endif
 	int		buffers[];
 };
 
@@ -2079,6 +2083,21 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 	if (discarded)
 		pr_cont(" (%d pages discarded)", discarded);
 	pr_cont("\n");
+
+#ifdef CONFIG_RING_BUFFER_PERSISTENT_INJECT
+	if (meta->nr_invalid)
+		pr_warn("Ring buffer testing [%d] invalid pages: %s (%d/%d)\n",
+			cpu_buffer->cpu,
+			(discarded == meta->nr_invalid) ? "PASSED" : "FAILED",
+			discarded, meta->nr_invalid);
+	if (meta->entry_bytes)
+		pr_warn("Ring buffer testing [%d] entry_bytes: %s (%ld/%ld)\n",
+			cpu_buffer->cpu,
+			(entry_bytes == meta->entry_bytes) ? "PASSED" : "FAILED",
+			(long)entry_bytes, (long)meta->entry_bytes);
+	meta->nr_invalid = 0;
+	meta->entry_bytes = 0;
+#endif
 	return;
 
  invalid:
@@ -2559,12 +2578,67 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	kfree(cpu_buffer);
 }
 
+#ifdef CONFIG_RING_BUFFER_PERSISTENT_INJECT
+static void rb_test_inject_invalid_pages(struct trace_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_cpu_meta *meta;
+	struct buffer_data_page *dpage;
+	u32 entry_bytes = 0;
+	unsigned long ptr;
+	int subbuf_size;
+	int invalid = 0;
+	int cpu;
+	int i;
+
+	if (!(buffer->flags & RB_FL_TESTING))
+		return;
+
+	guard(preempt)();
+	cpu = smp_processor_id();
+
+	cpu_buffer = buffer->buffers[cpu];
+	meta = cpu_buffer->ring_meta;
+	ptr = (unsigned long)rb_subbufs_from_meta(meta);
+	subbuf_size = meta->subbuf_size;
+
+	for (i = 0; i < meta->nr_subbufs; i++) {
+		int idx = meta->buffers[i];
+
+		dpage = (void *)(ptr + idx * subbuf_size);
+		/* Skip unused pages */
+		if (!local_read(&dpage->commit))
+			continue;
+
+		/*
+		 * Invalidate even pages or multiples of 5. This will cause 3
+		 * contiguous invalidated(empty) pages.
+		 */
+		if (!(i & 0x1) || !(i % 5)) {
+			local_add(subbuf_size + 1, &dpage->commit);
+			invalid++;
+		} else {
+			/* Count total commit bytes. */
+			entry_bytes += local_read(&dpage->commit);
+		}
+	}
+
+	pr_info("Inject invalidated %d pages on CPU%d, total size: %ld\n",
+		invalid, cpu, (long)entry_bytes);
+	meta->nr_invalid = invalid;
+	meta->entry_bytes = entry_bytes;
+}
+#else /* !CONFIG_RING_BUFFER_PERSISTENT_INJECT */
+#define rb_test_inject_invalid_pages(buffer)	do { } while (0)
+#endif
+
 /* Stop recording on a persistent buffer and flush cache if needed. */
 static int rb_flush_buffer_cb(struct notifier_block *nb, unsigned long event, void *data)
 {
 	struct trace_buffer *buffer = container_of(nb, struct trace_buffer, flush_nb);
 
 	ring_buffer_record_off(buffer);
+	rb_test_inject_invalid_pages(buffer);
 	arch_ring_buffer_flush_range(buffer->range_addr_start, buffer->range_addr_end);
 	return NOTIFY_DONE;
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e9455d46ec16..96101d276d13 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9436,6 +9436,8 @@ static void setup_trace_scratch(struct trace_array *tr,
 	memset(tscratch, 0, size);
 }
 
+#define TRACE_TEST_PTRACING_NAME	"ptracingtest"
+
 static int
 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size)
 {
@@ -9448,6 +9450,8 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned
 	buf->tr = tr;
 
 	if (tr->range_addr_start && tr->range_addr_size) {
+		if (!strcmp(tr->name, TRACE_TEST_PTRACING_NAME))
+			rb_flags |= RB_FL_TESTING;
 		/* Add scratch buffer to handle 128 modules */
 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
 						      tr->range_addr_start,


^ permalink raw reply related

* [PATCH v16 3/5] ring-buffer: Skip invalid sub-buffers when rewinding persistent ring buffer
From: Masami Hiramatsu (Google) @ 2026-04-06 10:24 UTC (permalink / raw)
  To: Steven Rostedt, Catalin Marinas, Will Deacon
  Cc: Masami Hiramatsu, Mathieu Desnoyers, linux-kernel,
	linux-trace-kernel, Ian Rogers, linux-arm-kernel
In-Reply-To: <177547105523.259641.14385891517704197263.stgit@mhiramat.tok.corp.google.com>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Skip invalid sub-buffers when rewinding the persistent ring buffer
instead of stopping the rewinding the ring buffer. The skipped
buffers are cleared.

To ensure the rewinding stops at the unused page, this also clears
buffer_data_page::time_stamp when tracing resets the buffer. This
allows us to identify unused pages and empty pages.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v12:
   - Fix build error.
 Changes in v11:
   - Reset timestamp when the buffer is invalid.
   - When rewinding, skip subbuf page if timestamp is wrong and
     check timestamp after validating buffer data page.
 Changes in v10:
   - Newly added.
---
 kernel/trace/ring_buffer.c |   76 +++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 33 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0c284094f7d0..518a05df6ef7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -363,6 +363,7 @@ struct buffer_page {
 static void rb_init_page(struct buffer_data_page *bpage)
 {
 	local_set(&bpage->commit, 0);
+	bpage->time_stamp = 0;
 }
 
 static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
@@ -1878,12 +1879,14 @@ static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu
 	return events;
 }
 
-static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
+static int rb_validate_buffer(struct buffer_page *bpage, int cpu,
 			      struct ring_buffer_cpu_meta *meta)
 {
+	struct buffer_data_page *dpage = bpage->page;
 	unsigned long long ts;
 	unsigned long tail;
 	u64 delta;
+	int ret = -1;
 
 	/*
 	 * When a sub-buffer is recovered from a read, the commit value may
@@ -1892,9 +1895,17 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
 	 * subbuf_size is considered invalid.
 	 */
 	tail = local_read(&dpage->commit) & ~RB_MISSED_MASK;
-	if (tail > meta->subbuf_size)
-		return -1;
-	return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
+	if (tail <= meta->subbuf_size)
+		ret = rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
+
+	if (ret < 0) {
+		local_set(&bpage->entries, 0);
+		local_set(&bpage->page->commit, 0);
+	} else {
+		local_set(&bpage->entries, ret);
+	}
+
+	return ret;
 }
 
 /* If the meta data has been validated, now validate the events */
@@ -1915,18 +1926,14 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 	orig_head = head_page = cpu_buffer->head_page;
 
 	/* Do the reader page first */
-	ret = rb_validate_buffer(cpu_buffer->reader_page->page, cpu_buffer->cpu, meta);
+	ret = rb_validate_buffer(cpu_buffer->reader_page, cpu_buffer->cpu, meta);
 	if (ret < 0) {
 		pr_info("Ring buffer meta [%d] invalid reader page detected\n",
 			cpu_buffer->cpu);
 		discarded++;
-		/* Instead of discard whole ring buffer, discard only this sub-buffer. */
-		local_set(&cpu_buffer->reader_page->entries, 0);
-		local_set(&cpu_buffer->reader_page->page->commit, 0);
 	} else {
 		entries += ret;
 		entry_bytes += rb_page_size(cpu_buffer->reader_page);
-		local_set(&cpu_buffer->reader_page->entries, ret);
 	}
 
 	ts = head_page->page->time_stamp;
@@ -1945,26 +1952,33 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 		if (head_page == cpu_buffer->tail_page)
 			break;
 
-		/* Ensure the page has older data than head. */
-		if (ts < head_page->page->time_stamp)
-			break;
-
-		ts = head_page->page->time_stamp;
-		/* Ensure the page has correct timestamp and some data. */
-		if (!ts || rb_page_commit(head_page) == 0)
-			break;
-
-		/* Stop rewind if the page is invalid. */
-		ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
-		if (ret < 0)
+		/* Rewind until unused page (no timestamp, no commit). */
+		if (!head_page->page->time_stamp && rb_page_commit(head_page) == 0)
 			break;
 
-		/* Recover the number of entries and update stats. */
-		local_set(&head_page->entries, ret);
-		if (ret)
-			local_inc(&cpu_buffer->pages_touched);
-		entries += ret;
-		entry_bytes += rb_page_commit(head_page);
+		/*
+		 * Skip if the page is invalid, or its timestamp is newer than the
+		 * previous valid page.
+		 */
+		ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta);
+		if (ret >= 0 && ts < head_page->page->time_stamp) {
+			local_set(&head_page->entries, 0);
+			local_set(&head_page->page->commit, 0);
+			head_page->page->time_stamp = ts;
+			ret = -1;
+		}
+		if (ret < 0) {
+			if (!discarded)
+				pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
+					cpu_buffer->cpu);
+			discarded++;
+		} else {
+			entries += ret;
+			entry_bytes += rb_page_size(head_page);
+			if (ret > 0)
+				local_inc(&cpu_buffer->pages_touched);
+			ts = head_page->page->time_stamp;
+		}
 	}
 	if (i)
 		pr_info("Ring buffer [%d] rewound %d pages\n", cpu_buffer->cpu, i);
@@ -2034,15 +2048,12 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 		if (head_page == cpu_buffer->reader_page)
 			continue;
 
-		ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
+		ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta);
 		if (ret < 0) {
 			if (!discarded)
 				pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
 					cpu_buffer->cpu);
 			discarded++;
-			/* Instead of discard whole ring buffer, discard only this sub-buffer. */
-			local_set(&head_page->entries, 0);
-			local_set(&head_page->page->commit, 0);
 		} else {
 			/* If the buffer has content, update pages_touched */
 			if (ret)
@@ -2050,7 +2061,6 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 
 			entries += ret;
 			entry_bytes += rb_page_size(head_page);
-			local_set(&head_page->entries, ret);
 		}
 		if (head_page == cpu_buffer->commit_page)
 			break;
@@ -2083,7 +2093,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 	/* Reset all the subbuffers */
 	for (i = 0; i < meta->nr_subbufs - 1; i++, rb_inc_page(&head_page)) {
 		local_set(&head_page->entries, 0);
-		local_set(&head_page->page->commit, 0);
+		rb_init_page(head_page->page);
 	}
 }
 


^ permalink raw reply related

* [PATCH v16 2/5] ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
From: Masami Hiramatsu (Google) @ 2026-04-06 10:24 UTC (permalink / raw)
  To: Steven Rostedt, Catalin Marinas, Will Deacon
  Cc: Masami Hiramatsu, Mathieu Desnoyers, linux-kernel,
	linux-trace-kernel, Ian Rogers, linux-arm-kernel
In-Reply-To: <177547105523.259641.14385891517704197263.stgit@mhiramat.tok.corp.google.com>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Skip invalid sub-buffers when validating the persistent ring buffer
instead of discarding the entire ring buffer. Only skipped buffers
are invalidated (cleared).

If the cache data in memory fails to be synchronized during a reboot,
the persistent ring buffer may become partially corrupted, but other
sub-buffers may still contain readable event data. Only discard the
subbuffers that are found to be corrupted.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
  Changes in v15:
  - Skip reader_page loop check on persistent ring buffer because
    there can be contiguous empty(invalidated) pages.
  - Do not show discarded page number information if it is 0.
  Changes in v11:
  - Fix a typo.
  Changes in v9:
  - Add meta->subbuf_size check.
  - Fix a typo.
  - Handle invalid reader_page case.
  Changes in v8:
  - Add comment in rb_valudate_buffer()
  - Clear the RB_MISSED_* flags in rb_valudate_buffer() instead of
    skipping subbuf.
  - Remove unused subbuf local variable from rb_cpu_meta_valid().
  Changes in v7:
  - Combined with Handling RB_MISSED_* flags patch, focus on validation at boot.
  - Remove checking subbuffer data when validating metadata, because it should be done
    later.
  - Do not mark the discarded sub buffer page but just reset it.
  Changes in v6:
  - Show invalid page detection message once per CPU.
  Changes in v5:
  - Instead of showing errors for each page, just show the number
    of discarded pages at last.
  Changes in v3:
  - Record missed data event on commit.
---
 kernel/trace/ring_buffer.c |  109 ++++++++++++++++++++++++++------------------
 1 file changed, 65 insertions(+), 44 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4d5817286791..0c284094f7d0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -370,6 +370,12 @@ static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
 	return local_read(&bpage->page->commit);
 }
 
+/* Size is determined by what has been committed */
+static __always_inline unsigned int rb_page_size(struct buffer_page *bpage)
+{
+	return rb_page_commit(bpage) & ~RB_MISSED_MASK;
+}
+
 static void free_buffer_page(struct buffer_page *bpage)
 {
 	/* Range pages are not to be freed */
@@ -1762,7 +1768,6 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
 			      unsigned long *subbuf_mask)
 {
 	int subbuf_size = PAGE_SIZE;
-	struct buffer_data_page *subbuf;
 	unsigned long buffers_start;
 	unsigned long buffers_end;
 	int i;
@@ -1770,6 +1775,11 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
 	if (!subbuf_mask)
 		return false;
 
+	if (meta->subbuf_size != PAGE_SIZE) {
+		pr_info("Ring buffer boot meta [%d] invalid subbuf_size\n", cpu);
+		return false;
+	}
+
 	buffers_start = meta->first_buffer;
 	buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs);
 
@@ -1786,11 +1796,12 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
 		return false;
 	}
 
-	subbuf = rb_subbufs_from_meta(meta);
-
 	bitmap_clear(subbuf_mask, 0, meta->nr_subbufs);
 
-	/* Is the meta buffers and the subbufs themselves have correct data? */
+	/*
+	 * Ensure the meta::buffers array has correct data. The data in each subbufs
+	 * are checked later in rb_meta_validate_events().
+	 */
 	for (i = 0; i < meta->nr_subbufs; i++) {
 		if (meta->buffers[i] < 0 ||
 		    meta->buffers[i] >= meta->nr_subbufs) {
@@ -1798,18 +1809,12 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
 			return false;
 		}
 
-		if ((unsigned)local_read(&subbuf->commit) > subbuf_size) {
-			pr_info("Ring buffer boot meta [%d] buffer invalid commit\n", cpu);
-			return false;
-		}
-
 		if (test_bit(meta->buffers[i], subbuf_mask)) {
 			pr_info("Ring buffer boot meta [%d] array has duplicates\n", cpu);
 			return false;
 		}
 
 		set_bit(meta->buffers[i], subbuf_mask);
-		subbuf = (void *)subbuf + subbuf_size;
 	}
 
 	return true;
@@ -1873,13 +1878,22 @@ static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu
 	return events;
 }
 
-static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu)
+static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
+			      struct ring_buffer_cpu_meta *meta)
 {
 	unsigned long long ts;
+	unsigned long tail;
 	u64 delta;
-	int tail;
 
-	tail = local_read(&dpage->commit);
+	/*
+	 * When a sub-buffer is recovered from a read, the commit value may
+	 * have RB_MISSED_* bits set, as these bits are reset on reuse.
+	 * Even after clearing these bits, a commit value greater than the
+	 * subbuf_size is considered invalid.
+	 */
+	tail = local_read(&dpage->commit) & ~RB_MISSED_MASK;
+	if (tail > meta->subbuf_size)
+		return -1;
 	return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
 }
 
@@ -1890,6 +1904,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 	struct buffer_page *head_page, *orig_head;
 	unsigned long entry_bytes = 0;
 	unsigned long entries = 0;
+	int discarded = 0;
 	int ret;
 	u64 ts;
 	int i;
@@ -1900,14 +1915,19 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 	orig_head = head_page = cpu_buffer->head_page;
 
 	/* Do the reader page first */
-	ret = rb_validate_buffer(cpu_buffer->reader_page->page, cpu_buffer->cpu);
+	ret = rb_validate_buffer(cpu_buffer->reader_page->page, cpu_buffer->cpu, meta);
 	if (ret < 0) {
-		pr_info("Ring buffer reader page is invalid\n");
-		goto invalid;
+		pr_info("Ring buffer meta [%d] invalid reader page detected\n",
+			cpu_buffer->cpu);
+		discarded++;
+		/* Instead of discard whole ring buffer, discard only this sub-buffer. */
+		local_set(&cpu_buffer->reader_page->entries, 0);
+		local_set(&cpu_buffer->reader_page->page->commit, 0);
+	} else {
+		entries += ret;
+		entry_bytes += rb_page_size(cpu_buffer->reader_page);
+		local_set(&cpu_buffer->reader_page->entries, ret);
 	}
-	entries += ret;
-	entry_bytes += local_read(&cpu_buffer->reader_page->page->commit);
-	local_set(&cpu_buffer->reader_page->entries, ret);
 
 	ts = head_page->page->time_stamp;
 
@@ -1935,7 +1955,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 			break;
 
 		/* Stop rewind if the page is invalid. */
-		ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
+		ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
 		if (ret < 0)
 			break;
 
@@ -2014,21 +2034,24 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 		if (head_page == cpu_buffer->reader_page)
 			continue;
 
-		ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
+		ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
 		if (ret < 0) {
-			pr_info("Ring buffer meta [%d] invalid buffer page\n",
-				cpu_buffer->cpu);
-			goto invalid;
-		}
-
-		/* If the buffer has content, update pages_touched */
-		if (ret)
-			local_inc(&cpu_buffer->pages_touched);
-
-		entries += ret;
-		entry_bytes += local_read(&head_page->page->commit);
-		local_set(&head_page->entries, ret);
+			if (!discarded)
+				pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
+					cpu_buffer->cpu);
+			discarded++;
+			/* Instead of discard whole ring buffer, discard only this sub-buffer. */
+			local_set(&head_page->entries, 0);
+			local_set(&head_page->page->commit, 0);
+		} else {
+			/* If the buffer has content, update pages_touched */
+			if (ret)
+				local_inc(&cpu_buffer->pages_touched);
 
+			entries += ret;
+			entry_bytes += rb_page_size(head_page);
+			local_set(&head_page->entries, ret);
+		}
 		if (head_page == cpu_buffer->commit_page)
 			break;
 	}
@@ -2042,7 +2065,10 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
 	local_set(&cpu_buffer->entries, entries);
 	local_set(&cpu_buffer->entries_bytes, entry_bytes);
 
-	pr_info("Ring buffer meta [%d] is from previous boot!\n", cpu_buffer->cpu);
+	pr_info("Ring buffer meta [%d] is from previous boot!", cpu_buffer->cpu);
+	if (discarded)
+		pr_cont(" (%d pages discarded)", discarded);
+	pr_cont("\n");
 	return;
 
  invalid:
@@ -3329,12 +3355,6 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
 	return NULL;
 }
 
-/* Size is determined by what has been committed */
-static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
-{
-	return rb_page_commit(bpage) & ~RB_MISSED_MASK;
-}
-
 static __always_inline unsigned
 rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
 {
@@ -5647,11 +5667,12 @@ __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
  again:
 	/*
 	 * This should normally only loop twice. But because the
-	 * start of the reader inserts an empty page, it causes
-	 * a case where we will loop three times. There should be no
-	 * reason to loop four times (that I know of).
+	 * start of the reader inserts an empty page, it causes a
+	 * case where we will loop three times. There should be no
+	 * reason to loop four times unless the ring buffer is a
+	 * recovered persistent ring buffer.
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3 && !cpu_buffer->ring_meta)) {
 		reader = NULL;
 		goto out;
 	}


^ permalink raw reply related

* [PATCH v16 1/5] ring-buffer: Flush and stop persistent ring buffer on panic
From: Masami Hiramatsu (Google) @ 2026-04-06 10:24 UTC (permalink / raw)
  To: Steven Rostedt, Catalin Marinas, Will Deacon
  Cc: Masami Hiramatsu, Mathieu Desnoyers, linux-kernel,
	linux-trace-kernel, Ian Rogers, linux-arm-kernel
In-Reply-To: <177547105523.259641.14385891517704197263.stgit@mhiramat.tok.corp.google.com>

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

On real hardware, panic and machine reboot may not flush hardware cache
to memory. This means the persistent ring buffer, which relies on a
coherent state of memory, may not have its events written to the buffer
and they may be lost. Moreover, there may be inconsistency with the
counters which are used for validation of the integrity of the
persistent ring buffer which may cause all data to be discarded.

To avoid this issue, stop recording of the ring buffer on panic and
flush the cache of the ring buffer's memory.

Fixes: e645535a954a ("tracing: Add option to use memmapped memory for trace boot instance")
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Changes in v13:
   - Fix a rebase conflict.
 Changes in v11:
   - Do nothing by default since flush_cache_vmap() does nothing on x86
     but it can cause deadlock on some architectures via on_each_cpu()
     because other CPUs will be stoppped when panic notifier is called.
 Changes in v9:
   - Fix typo of & to &&.
   - Fix typo of "Generic"
 Changes in v6:
   - Introduce asm/ring_buffer.h for arch_ring_buffer_flush_range().
   - Use flush_cache_vmap() instead of flush_cache_all().
 Changes in v5:
   - Use ring_buffer_record_off() instead of ring_buffer_record_disable().
   - Use flush_cache_all() to ensure flush all cache.
 Changes in v3:
   - update patch description.
---
 arch/alpha/include/asm/Kbuild        |    1 +
 arch/arc/include/asm/Kbuild          |    1 +
 arch/arm/include/asm/Kbuild          |    1 +
 arch/arm64/include/asm/ring_buffer.h |   10 ++++++++++
 arch/csky/include/asm/Kbuild         |    1 +
 arch/hexagon/include/asm/Kbuild      |    1 +
 arch/loongarch/include/asm/Kbuild    |    1 +
 arch/m68k/include/asm/Kbuild         |    1 +
 arch/microblaze/include/asm/Kbuild   |    1 +
 arch/mips/include/asm/Kbuild         |    1 +
 arch/nios2/include/asm/Kbuild        |    1 +
 arch/openrisc/include/asm/Kbuild     |    1 +
 arch/parisc/include/asm/Kbuild       |    1 +
 arch/powerpc/include/asm/Kbuild      |    1 +
 arch/riscv/include/asm/Kbuild        |    1 +
 arch/s390/include/asm/Kbuild         |    1 +
 arch/sh/include/asm/Kbuild           |    1 +
 arch/sparc/include/asm/Kbuild        |    1 +
 arch/um/include/asm/Kbuild           |    1 +
 arch/x86/include/asm/Kbuild          |    1 +
 arch/xtensa/include/asm/Kbuild       |    1 +
 include/asm-generic/ring_buffer.h    |   13 +++++++++++++
 kernel/trace/ring_buffer.c           |   22 ++++++++++++++++++++++
 23 files changed, 65 insertions(+)
 create mode 100644 arch/arm64/include/asm/ring_buffer.h
 create mode 100644 include/asm-generic/ring_buffer.h

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 483965c5a4de..b154b4e3dfa8 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += agp.h
 generic-y += asm-offsets.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
 generic-y += text-patching.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 4c69522e0328..483caacc6988 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
+generic-y += ring_buffer.h
 generic-y += user.h
 generic-y += text-patching.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 03657ff8fbe3..decad5f2c826 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += early_ioremap.h
 generic-y += extable.h
 generic-y += flat.h
 generic-y += parport.h
+generic-y += ring_buffer.h
 
 generated-y += mach-types.h
 generated-y += unistd-nr.h
diff --git a/arch/arm64/include/asm/ring_buffer.h b/arch/arm64/include/asm/ring_buffer.h
new file mode 100644
index 000000000000..62316c406888
--- /dev/null
+++ b/arch/arm64/include/asm/ring_buffer.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_ARM64_RING_BUFFER_H
+#define _ASM_ARM64_RING_BUFFER_H
+
+#include <asm/cacheflush.h>
+
+/* Flush D-cache on persistent ring buffer */
+#define arch_ring_buffer_flush_range(start, end)	dcache_clean_pop(start, end)
+
+#endif /* _ASM_ARM64_RING_BUFFER_H */
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 3a5c7f6e5aac..7dca0c6cdc84 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += qrwlock.h
 generic-y += qrwlock_types.h
 generic-y += qspinlock.h
 generic-y += parport.h
+generic-y += ring_buffer.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
 generic-y += text-patching.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 1efa1e993d4b..0f887d4238ed 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += extable.h
 generic-y += iomap.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
 generic-y += text-patching.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 9034b583a88a..7e92957baf6a 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -10,5 +10,6 @@ generic-y += qrwlock.h
 generic-y += user.h
 generic-y += ioctl.h
 generic-y += mmzone.h
+generic-y += ring_buffer.h
 generic-y += statfs.h
 generic-y += text-patching.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index b282e0dd8dc1..62543bf305ff 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -3,5 +3,6 @@ generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
 generic-y += spinlock.h
 generic-y += text-patching.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 7178f990e8b3..0030309b47ad 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
+generic-y += ring_buffer.h
 generic-y += syscalls.h
 generic-y += tlb.h
 generic-y += user.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 684569b2ecd6..9771c3d85074 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -12,5 +12,6 @@ generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
+generic-y += ring_buffer.h
 generic-y += user.h
 generic-y += text-patching.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 28004301c236..0a2530964413 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cmpxchg.h
 generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
 generic-y += spinlock.h
 generic-y += user.h
 generic-y += text-patching.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index cef49d60d74c..8aa34621702d 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -8,4 +8,5 @@ generic-y += spinlock_types.h
 generic-y += spinlock.h
 generic-y += qrwlock_types.h
 generic-y += qrwlock.h
+generic-y += ring_buffer.h
 generic-y += user.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 4fb596d94c89..d48d158f7241 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -4,4 +4,5 @@ generated-y += syscall_table_64.h
 generic-y += agp.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
 generic-y += user.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 2e23533b67e3..805b5aeebb6f 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -5,4 +5,5 @@ generated-y += syscall_table_spu.h
 generic-y += agp.h
 generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
+generic-y += ring_buffer.h
 generic-y += early_ioremap.h
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index bd5fc9403295..7721b63642f4 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -14,5 +14,6 @@ generic-y += ticket_spinlock.h
 generic-y += qrwlock.h
 generic-y += qrwlock_types.h
 generic-y += qspinlock.h
+generic-y += ring_buffer.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 80bad7de7a04..0c1fc47c3ba0 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,3 +7,4 @@ generated-y += unistd_nr.h
 generic-y += asm-offsets.h
 generic-y += mcs_spinlock.h
 generic-y += mmzone.h
+generic-y += ring_buffer.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 4d3f10ed8275..f0403d3ee8ab 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -3,4 +3,5 @@ generated-y += syscall_table.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
+generic-y += ring_buffer.h
 generic-y += text-patching.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 17ee8a273aa6..49c6bb326b75 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -4,4 +4,5 @@ generated-y += syscall_table_64.h
 generic-y += agp.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
+generic-y += ring_buffer.h
 generic-y += text-patching.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 1b9b82bbe322..2a1629ba8140 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -17,6 +17,7 @@ generic-y += module.lds.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += ring_buffer.h
 generic-y += runtime-const.h
 generic-y += softirq_stack.h
 generic-y += switch_to.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4566000e15c4..078fd2c0d69d 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -14,3 +14,4 @@ generic-y += early_ioremap.h
 generic-y += fprobe.h
 generic-y += mcs_spinlock.h
 generic-y += mmzone.h
+generic-y += ring_buffer.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 13fe45dea296..e57af619263a 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -6,5 +6,6 @@ generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
+generic-y += ring_buffer.h
 generic-y += user.h
 generic-y += text-patching.h
diff --git a/include/asm-generic/ring_buffer.h b/include/asm-generic/ring_buffer.h
new file mode 100644
index 000000000000..201d2aee1005
--- /dev/null
+++ b/include/asm-generic/ring_buffer.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic arch dependent ring_buffer macros.
+ */
+#ifndef __ASM_GENERIC_RING_BUFFER_H__
+#define __ASM_GENERIC_RING_BUFFER_H__
+
+#include <linux/cacheflush.h>
+
+/* Flush cache on ring buffer range if needed. Do nothing by default. */
+#define arch_ring_buffer_flush_range(start, end)	do { } while (0)
+
+#endif /* __ASM_GENERIC_RING_BUFFER_H__ */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2caa5d3d0ae9..4d5817286791 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -7,6 +7,7 @@
 #include <linux/ring_buffer_types.h>
 #include <linux/sched/isolation.h>
 #include <linux/trace_recursion.h>
+#include <linux/panic_notifier.h>
 #include <linux/trace_events.h>
 #include <linux/ring_buffer.h>
 #include <linux/trace_clock.h>
@@ -31,6 +32,7 @@
 #include <linux/oom.h>
 #include <linux/mm.h>
 
+#include <asm/ring_buffer.h>
 #include <asm/local64.h>
 #include <asm/local.h>
 #include <asm/setup.h>
@@ -559,6 +561,7 @@ struct trace_buffer {
 
 	unsigned long			range_addr_start;
 	unsigned long			range_addr_end;
+	struct notifier_block		flush_nb;
 
 	struct ring_buffer_meta		*meta;
 
@@ -2520,6 +2523,16 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 	kfree(cpu_buffer);
 }
 
+/* Stop recording on a persistent buffer and flush cache if needed. */
+static int rb_flush_buffer_cb(struct notifier_block *nb, unsigned long event, void *data)
+{
+	struct trace_buffer *buffer = container_of(nb, struct trace_buffer, flush_nb);
+
+	ring_buffer_record_off(buffer);
+	arch_ring_buffer_flush_range(buffer->range_addr_start, buffer->range_addr_end);
+	return NOTIFY_DONE;
+}
+
 static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
 					 int order, unsigned long start,
 					 unsigned long end,
@@ -2650,6 +2663,12 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
 
 	mutex_init(&buffer->mutex);
 
+	/* Persistent ring buffer needs to flush cache before reboot. */
+	if (start && end) {
+		buffer->flush_nb.notifier_call = rb_flush_buffer_cb;
+		atomic_notifier_chain_register(&panic_notifier_list, &buffer->flush_nb);
+	}
+
 	return_ptr(buffer);
 
  fail_free_buffers:
@@ -2748,6 +2767,9 @@ ring_buffer_free(struct trace_buffer *buffer)
 {
 	int cpu;
 
+	if (buffer->range_addr_start && buffer->range_addr_end)
+		atomic_notifier_chain_unregister(&panic_notifier_list, &buffer->flush_nb);
+
 	cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
 
 	irq_work_sync(&buffer->irq_work.work);


^ permalink raw reply related

* [PATCH v16 0/5] ring-buffer: Making persistent ring buffers robust
From: Masami Hiramatsu (Google) @ 2026-04-06 10:24 UTC (permalink / raw)
  To: Steven Rostedt, Catalin Marinas, Will Deacon
  Cc: Masami Hiramatsu, Mathieu Desnoyers, linux-kernel,
	linux-trace-kernel, Ian Rogers, linux-arm-kernel

Hi,

Here is the 16th version of improvement patches for making persistent
ring buffers robust to failures.
The previous version is here:

https://lore.kernel.org/all/177494615421.71933.3679132057004156013.stgit@mhiramat.tok.corp.google.com/

This version adds Catalin's Ack [1/5] and update description and
document[4/5][5/5]. Also, rebased on ring-buffer/for-next.

Thank you,

---

Masami Hiramatsu (Google) (5):
      ring-buffer: Flush and stop persistent ring buffer on panic
      ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
      ring-buffer: Skip invalid sub-buffers when rewinding persistent ring buffer
      ring-buffer: Add persistent ring buffer invalid-page inject test
      ring-buffer: Show commit numbers in buffer_meta file


 arch/alpha/include/asm/Kbuild        |    1 
 arch/arc/include/asm/Kbuild          |    1 
 arch/arm/include/asm/Kbuild          |    1 
 arch/arm64/include/asm/ring_buffer.h |   10 +
 arch/csky/include/asm/Kbuild         |    1 
 arch/hexagon/include/asm/Kbuild      |    1 
 arch/loongarch/include/asm/Kbuild    |    1 
 arch/m68k/include/asm/Kbuild         |    1 
 arch/microblaze/include/asm/Kbuild   |    1 
 arch/mips/include/asm/Kbuild         |    1 
 arch/nios2/include/asm/Kbuild        |    1 
 arch/openrisc/include/asm/Kbuild     |    1 
 arch/parisc/include/asm/Kbuild       |    1 
 arch/powerpc/include/asm/Kbuild      |    1 
 arch/riscv/include/asm/Kbuild        |    1 
 arch/s390/include/asm/Kbuild         |    1 
 arch/sh/include/asm/Kbuild           |    1 
 arch/sparc/include/asm/Kbuild        |    1 
 arch/um/include/asm/Kbuild           |    1 
 arch/x86/include/asm/Kbuild          |    1 
 arch/xtensa/include/asm/Kbuild       |    1 
 include/asm-generic/ring_buffer.h    |   13 ++
 include/linux/ring_buffer.h          |    1 
 kernel/trace/Kconfig                 |   34 ++++
 kernel/trace/ring_buffer.c           |  258 ++++++++++++++++++++++++++--------
 kernel/trace/trace.c                 |    4 +
 26 files changed, 276 insertions(+), 64 deletions(-)
 create mode 100644 arch/arm64/include/asm/ring_buffer.h
 create mode 100644 include/asm-generic/ring_buffer.h

--
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* [PATCH v2] bootconfig: Skip printing early params to cmdline from bootconfig
From: Masami Hiramatsu (Google) @ 2026-04-06  7:49 UTC (permalink / raw)
  To: Masami Hiramatsu, Steven Rostedt
  Cc: Breno Leitao, linux-kernel, linux-trace-kernel

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

If user configures `kernel.key` in bootconfig, the 'key' is shown
in kernel cmdline (/proc/cmdline) and kernel boot parameter
handler associated with 'key' is invoked. However, since the
bootconfig does not support the parameter defined with early_param,
those keys are shown in '/proc/cmdline' but not handled by kernel.

This could easily mislead users who expected to be able to specify
early parameters via the boot configuration, leading them to wonder
why it doesn't work.

Let's skip printing out early params to cmdline buffer, and warn
if there is such parameters in bootconfig.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v2:
  - Check if the parameter is defined only by early_param().
---
 init/main.c |   35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/init/main.c b/init/main.c
index 1cb395dd94e4..e4687c00e8fb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -324,10 +324,26 @@ static void * __init get_boot_config_from_initrd(size_t *_size)
 
 static char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
 
+/* Return true if the given param is only defined by early_param(). */
+static bool __init is_early_only_param(const char *param)
+{
+	const struct obs_kernel_param *p;
+	bool ret = false;
+
+	for (p = __setup_start; p < __setup_end; p++) {
+		if (parameq(param, p->str)) {
+			if (!p->early)
+				return false;
+			ret = true;
+		}
+	}
+	return ret;
+}
+
 #define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0)
 
 static int __init xbc_snprint_cmdline(char *buf, size_t size,
-				      struct xbc_node *root)
+				      struct xbc_node *root, bool is_kernel)
 {
 	struct xbc_node *knode, *vnode;
 	char *end = buf + size;
@@ -340,6 +356,13 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
 		if (ret < 0)
 			return ret;
 
+		/* We will skip early params because it is not applied. */
+		if (is_kernel && is_early_only_param(xbc_namebuf)) {
+			pr_warn_once("bootconfig: early_param(e.g. %s.%s) can not be handled.\n",
+				     xbc_node_get_data(root), xbc_namebuf);
+			continue;
+		}
+
 		vnode = xbc_node_get_child(knode);
 		if (!vnode) {
 			ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
@@ -368,7 +391,7 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
 #undef rest
 
 /* Make an extra command line under given key word */
-static char * __init xbc_make_cmdline(const char *key)
+static char * __init xbc_make_cmdline(const char *key, bool is_kernel)
 {
 	struct xbc_node *root;
 	char *new_cmdline;
@@ -379,7 +402,7 @@ static char * __init xbc_make_cmdline(const char *key)
 		return NULL;
 
 	/* Count required buffer size */
-	len = xbc_snprint_cmdline(NULL, 0, root);
+	len = xbc_snprint_cmdline(NULL, 0, root, is_kernel);
 	if (len <= 0)
 		return NULL;
 
@@ -389,7 +412,7 @@ static char * __init xbc_make_cmdline(const char *key)
 		return NULL;
 	}
 
-	ret = xbc_snprint_cmdline(new_cmdline, len + 1, root);
+	ret = xbc_snprint_cmdline(new_cmdline, len + 1, root, is_kernel);
 	if (ret < 0 || ret > len) {
 		pr_err("Failed to print extra kernel cmdline.\n");
 		memblock_free(new_cmdline, len + 1);
@@ -465,9 +488,9 @@ static void __init setup_boot_config(void)
 		xbc_get_info(&ret, NULL);
 		pr_info("Load bootconfig: %ld bytes %d nodes\n", (long)size, ret);
 		/* keys starting with "kernel." are passed via cmdline */
-		extra_command_line = xbc_make_cmdline("kernel");
+		extra_command_line = xbc_make_cmdline("kernel", true);
 		/* Also, "init." keys are init arguments */
-		extra_init_args = xbc_make_cmdline("init");
+		extra_init_args = xbc_make_cmdline("init", false);
 	}
 	return;
 }


^ permalink raw reply related

* Re: [PATCH v3] kernel/trace: fixed static warnings
From: Abhijith Sriram @ 2026-04-06  7:30 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Steven Rostedt, Mathieu Desnoyers, open list, open list:TRACING
In-Reply-To: <20260406161801.d507eeb247eb8b3b17f490a8@kernel.org>

On Mon, Apr 6, 2026 at 9:18 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Mon,  6 Apr 2026 08:00:36 +0200
> abhijithsriram95@gmail.com wrote:
>
> > From: Abhijith Sriram <abhijithsriram95@gmail.com>
> >
> > The change in the function argument description
> > was due to the static code checker script reading
> > the word filter back to back
> >
> > Changes in v2:
> > - corrected *m = file->private_data to m = file->private_data
> >
> > Changes in v3:
> > - reverted the changes for struct seq_file *m and
> >   added a new empty line instead
> >
> > Signed-off-by: Abhijith Sriram <abhijithsriram95@gmail.com>
> > ---
> >  kernel/trace/trace_events_trigger.c | 8 +++++---
> >  new-changes                         | 6 ++++++
> >  2 files changed, 11 insertions(+), 3 deletions(-)
> >  create mode 100644 new-changes
> >
> > diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
> > index 655db2e82513..08adb593fcd9 100644
> > --- a/kernel/trace/trace_events_trigger.c
> > +++ b/kernel/trace/trace_events_trigger.c
> > @@ -246,7 +246,7 @@ event_triggers_post_call(struct trace_event_file *file,
> >  }
> >  EXPORT_SYMBOL_GPL(event_triggers_post_call);
> >
> > -#define SHOW_AVAILABLE_TRIGGERS      (void *)(1UL)
> > +#define SHOW_AVAILABLE_TRIGGERS      ((void *)(1UL))
> >
> >  static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
> >  {
> > @@ -352,6 +352,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
> >               ret = seq_open(file, &event_triggers_seq_ops);
> >               if (!ret) {
> >                       struct seq_file *m = file->private_data;
> > +
> >                       m->private = file;
> >               }
> >       }
> > @@ -388,9 +389,9 @@ static ssize_t event_trigger_regex_write(struct file *file,
> >                                        const char __user *ubuf,
> >                                        size_t cnt, loff_t *ppos)
> >  {
> > +     char *buf __free(kfree) = NULL;
> >       struct trace_event_file *event_file;
> >       ssize_t ret;
> > -     char *buf __free(kfree) = NULL;
>
> Again, this is not OK. Even if checkpatch.pl complained against this,
> there should be no problem. Only if you think this is not sorted by
> length, you can do:
Removed it in the new patch
>
>         struct trace_event_file *event_file;
> +       char *buf __free(kfree) = NULL;
>         ssize_t ret;
> -       char *buf __free(kfree) = NULL;
>
> This change is acceptable as a cosmetic change.
>
> >
> >       if (!cnt)
> >               return 0;
> > @@ -633,6 +634,7 @@ clear_event_triggers(struct trace_array *tr)
> >
> >       list_for_each_entry(file, &tr->events, list) {
> >               struct event_trigger_data *data, *n;
> > +
> >               list_for_each_entry_safe(data, n, &file->triggers, list) {
> >                       trace_event_trigger_enable_disable(file, 0);
> >                       list_del_rcu(&data->list);
> > @@ -785,7 +787,7 @@ static void unregister_trigger(char *glob,
> >   *   cmd               - the trigger command name
> >   *   glob              - the trigger command name optionally prefaced with '!'
> >   *   param_and_filter  - text following cmd and ':'
> > - *   param             - text following cmd and ':' and stripped of filter
> > + *   param             - text following cmd and ':' and filter removed
> >   *   filter            - the optional filter text following (and including) 'if'
> >   *
> >   * To illustrate the use of these components, here are some concrete
> > diff --git a/new-changes b/new-changes
> > new file mode 100644
> > index 000000000000..9e3a24de3033
> > --- /dev/null
> > +++ b/new-changes
> > @@ -0,0 +1,6 @@
> > +Line 25 -> adding const to the pointer address as well.
> > +
> > +linw 1193 -> removing else because there is a return statement in the if condition
> > +line 1727 -> adding new line after statement
> > +line 1800 -> reordering to solve missing a blank line warning
> > +line 12364 -> changed the function to kstrtoul
> > \ No newline at end of file
>
> Is this your working note?
> Please remove it.
Removed it in the new patch
>
> Thank you,
>
> > --
> > 2.43.0
> >
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
Thank you for your patience. I am still learning and trying to find my hold.
Here is the new patch version:
https://lore.kernel.org/linux-trace-kernel/20260406072834.243491-2-abhijithsriram95@gmail.com/T/#u
I will do my best to be more careful in the future :)


-- 
Regards
Abhijith Sriram

^ permalink raw reply

* [PATCH v4] kernel/trace: fixed static warnings
From: abhijithsriram95 @ 2026-04-06  7:28 UTC (permalink / raw)
  To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	open list:TRACING, open list:TRACING
  Cc: Abhijith Sriram

From: Abhijith Sriram <abhijithsriram95@gmail.com>

The change in the function argument description
was due to the static code checker script reading
the word filter back to back

Changes in v2:
- corrected *m = file->private_data to m = file->private_data

Changes in v3:
- reverted the changes for struct seq_file *m and
  added a new empty line instead

Changes in v4:
- added a new empty line before char *buf ...
  previously this line was relocated to avoid the
  static check warning.

Signed-off-by: Abhijith Sriram <abhijithsriram95@gmail.com>
---
 kernel/trace/trace_events_trigger.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 655db2e82513..664283bcd9ea 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -246,7 +246,7 @@ event_triggers_post_call(struct trace_event_file *file,
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
 
-#define SHOW_AVAILABLE_TRIGGERS	(void *)(1UL)
+#define SHOW_AVAILABLE_TRIGGERS	((void *)(1UL))
 
 static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
 {
@@ -352,6 +352,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
 		ret = seq_open(file, &event_triggers_seq_ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
+
 			m->private = file;
 		}
 	}
@@ -390,6 +391,7 @@ static ssize_t event_trigger_regex_write(struct file *file,
 {
 	struct trace_event_file *event_file;
 	ssize_t ret;
+
 	char *buf __free(kfree) = NULL;
 
 	if (!cnt)
@@ -633,6 +635,7 @@ clear_event_triggers(struct trace_array *tr)
 
 	list_for_each_entry(file, &tr->events, list) {
 		struct event_trigger_data *data, *n;
+
 		list_for_each_entry_safe(data, n, &file->triggers, list) {
 			trace_event_trigger_enable_disable(file, 0);
 			list_del_rcu(&data->list);
@@ -785,7 +788,7 @@ static void unregister_trigger(char *glob,
  *   cmd               - the trigger command name
  *   glob              - the trigger command name optionally prefaced with '!'
  *   param_and_filter  - text following cmd and ':'
- *   param             - text following cmd and ':' and stripped of filter
+ *   param             - text following cmd and ':' and filter removed
  *   filter            - the optional filter text following (and including) 'if'
  *
  * To illustrate the use of these components, here are some concrete
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v3] kernel/trace: fixed static warnings
From: Masami Hiramatsu @ 2026-04-06  7:18 UTC (permalink / raw)
  To: abhijithsriram95
  Cc: Steven Rostedt, Mathieu Desnoyers, open list, open list:TRACING
In-Reply-To: <20260406060046.223496-2-abhijithsriram95@gmail.com>

On Mon,  6 Apr 2026 08:00:36 +0200
abhijithsriram95@gmail.com wrote:

> From: Abhijith Sriram <abhijithsriram95@gmail.com>
> 
> The change in the function argument description
> was due to the static code checker script reading
> the word filter back to back
> 
> Changes in v2:
> - corrected *m = file->private_data to m = file->private_data
> 
> Changes in v3:
> - reverted the changes for struct seq_file *m and
>   added a new empty line instead
> 
> Signed-off-by: Abhijith Sriram <abhijithsriram95@gmail.com>
> ---
>  kernel/trace/trace_events_trigger.c | 8 +++++---
>  new-changes                         | 6 ++++++
>  2 files changed, 11 insertions(+), 3 deletions(-)
>  create mode 100644 new-changes
> 
> diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
> index 655db2e82513..08adb593fcd9 100644
> --- a/kernel/trace/trace_events_trigger.c
> +++ b/kernel/trace/trace_events_trigger.c
> @@ -246,7 +246,7 @@ event_triggers_post_call(struct trace_event_file *file,
>  }
>  EXPORT_SYMBOL_GPL(event_triggers_post_call);
>  
> -#define SHOW_AVAILABLE_TRIGGERS	(void *)(1UL)
> +#define SHOW_AVAILABLE_TRIGGERS	((void *)(1UL))
>  
>  static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
>  {
> @@ -352,6 +352,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
>  		ret = seq_open(file, &event_triggers_seq_ops);
>  		if (!ret) {
>  			struct seq_file *m = file->private_data;
> +
>  			m->private = file;
>  		}
>  	}
> @@ -388,9 +389,9 @@ static ssize_t event_trigger_regex_write(struct file *file,
>  					 const char __user *ubuf,
>  					 size_t cnt, loff_t *ppos)
>  {
> +	char *buf __free(kfree) = NULL;
>  	struct trace_event_file *event_file;
>  	ssize_t ret;
> -	char *buf __free(kfree) = NULL;

Again, this is not OK. Even if checkpatch.pl complained against this,
there should be no problem. Only if you think this is not sorted by
length, you can do:

 	struct trace_event_file *event_file;
+	char *buf __free(kfree) = NULL;
 	ssize_t ret;
-	char *buf __free(kfree) = NULL;

This change is acceptable as a cosmetic change.

>  
>  	if (!cnt)
>  		return 0;
> @@ -633,6 +634,7 @@ clear_event_triggers(struct trace_array *tr)
>  
>  	list_for_each_entry(file, &tr->events, list) {
>  		struct event_trigger_data *data, *n;
> +
>  		list_for_each_entry_safe(data, n, &file->triggers, list) {
>  			trace_event_trigger_enable_disable(file, 0);
>  			list_del_rcu(&data->list);
> @@ -785,7 +787,7 @@ static void unregister_trigger(char *glob,
>   *   cmd               - the trigger command name
>   *   glob              - the trigger command name optionally prefaced with '!'
>   *   param_and_filter  - text following cmd and ':'
> - *   param             - text following cmd and ':' and stripped of filter
> + *   param             - text following cmd and ':' and filter removed
>   *   filter            - the optional filter text following (and including) 'if'
>   *
>   * To illustrate the use of these components, here are some concrete
> diff --git a/new-changes b/new-changes
> new file mode 100644
> index 000000000000..9e3a24de3033
> --- /dev/null
> +++ b/new-changes
> @@ -0,0 +1,6 @@
> +Line 25 -> adding const to the pointer address as well.
> +
> +linw 1193 -> removing else because there is a return statement in the if condition
> +line 1727 -> adding new line after statement
> +line 1800 -> reordering to solve missing a blank line warning
> +line 12364 -> changed the function to kstrtoul
> \ No newline at end of file

Is this your working note?
Please remove it.

Thank you,

> -- 
> 2.43.0
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH] kernel/trace: fixed static warnings
From: Abhijith Sriram @ 2026-04-06  6:03 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Steven Rostedt, Mathieu Desnoyers, open list:TRACING,
	open list:TRACING
In-Reply-To: <20260406110053.b0582d349e42eefb1c4aeda6@kernel.org>

On Mon, Apr 6, 2026 at 4:00 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Sat, 4 Apr 2026 08:03:07 +0200
> Abhijith Sriram <abhijithsriram95@gmail.com> wrote:
>
> > On Sat, Apr 4, 2026 at 2:18 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > On Thu,  2 Apr 2026 21:54:04 +0200
> > > abhijithsriram95@gmail.com wrote:
> > >
> > > > From: Abhijith Sriram <abhijithsriram95@gmail.com>
> > > >
> > > > The change in the function argument description
> > > > was due to the static code checker script reading
> > > > the word filter back to back
> > > >
> > > > Signed-off-by: Abhijith Sriram <abhijithsriram95@gmail.com>
> > > > ---
> > > >  kernel/trace/trace_events_trigger.c | 10 ++++++----
> > > >  1 file changed, 6 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
> > > > index 655db2e82513..477d8dee3362 100644
> > > > --- a/kernel/trace/trace_events_trigger.c
> > > > +++ b/kernel/trace/trace_events_trigger.c
> > > > @@ -246,7 +246,7 @@ event_triggers_post_call(struct trace_event_file *file,
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(event_triggers_post_call);
> > > >
> > > > -#define SHOW_AVAILABLE_TRIGGERS      (void *)(1UL)
> > > > +#define SHOW_AVAILABLE_TRIGGERS      ((void *)(1UL))
> > >
> > > This is OK.
> > >
> > > >
> > > >  static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
> > > >  {
> > > > @@ -325,6 +325,7 @@ static const struct seq_operations event_triggers_seq_ops = {
> > > >  static int event_trigger_regex_open(struct inode *inode, struct file *file)
> > > >  {
> > > >       int ret;
> > > > +     struct seq_file *m = NULL;
> > > >
> > > >       ret = security_locked_down(LOCKDOWN_TRACEFS);
> > > >       if (ret)
> > > > @@ -351,7 +352,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
> > > >       if (file->f_mode & FMODE_READ) {
> > > >               ret = seq_open(file, &event_triggers_seq_ops);
> > > >               if (!ret) {
> > > > -                     struct seq_file *m = file->private_data;
> > > > +                     *m = file->private_data;
> > >
> > > Why is this change required?
> > The original warning says "missing blank line after declaration". I
> > thought it was cleaner to have the
> > declaration in the beginning of the function. I made a mistake here
> > which I fixed in the version 2 of
> > the patch, please have a look here:
> > https://lore.kernel.org/linux-trace-kernel/20260403071108.23422-2-abhijithsriram95@gmail.com/T/#u
>
> In that case, you just need to add an empty line, no need to move the
> definition becuase it changes the scope of `m` variable.
I reverted the changes in version 3 of the patch.
Please find it here:
https://lore.kernel.org/linux-trace-kernel/20260406060046.223496-2-abhijithsriram95@gmail.com/T/#u
>
> > >
> > > >                       m->private = file;
> > > >               }
> > > >       }
> > > > @@ -388,9 +389,9 @@ static ssize_t event_trigger_regex_write(struct file *file,
> > > >                                        const char __user *ubuf,
> > > >                                        size_t cnt, loff_t *ppos)
> > > >  {
> > > > +     char *buf __free(kfree) = NULL;
> > > >       struct trace_event_file *event_file;
> > > >       ssize_t ret;
> > > > -     char *buf __free(kfree) = NULL;
> > >
> > > What is this change?
> > The same missing blank lines after declaration was triggered here,
> > even though there is a blank line after the char *buf.
> > If I do give an empty line then there is another error "Trailing white
> > space". So I  reordered it and the warning disappeared.
> > This change I am not super sure since it is usually recommended that
> > variables of larger size are declared first
> > for padding purposes. What do you think?
>
> Ah, that is a known checkpatch's bug. It does not understand __free()
> macro. So please ignore that error (or/and fix checkpatch.pl).
>
> Thanks,
>
> > >
> > > Thanks,
> > >
> > > >
> > > >       if (!cnt)
> > > >               return 0;
> > > > @@ -633,6 +634,7 @@ clear_event_triggers(struct trace_array *tr)
> > > >
> > > >       list_for_each_entry(file, &tr->events, list) {
> > > >               struct event_trigger_data *data, *n;
> > > > +
> > > >               list_for_each_entry_safe(data, n, &file->triggers, list) {
> > > >                       trace_event_trigger_enable_disable(file, 0);
> > > >                       list_del_rcu(&data->list);
> > > > @@ -785,7 +787,7 @@ static void unregister_trigger(char *glob,
> > > >   *   cmd               - the trigger command name
> > > >   *   glob              - the trigger command name optionally prefaced with '!'
> > > >   *   param_and_filter  - text following cmd and ':'
> > > > - *   param             - text following cmd and ':' and stripped of filter
> > > > + *   param             - text following cmd and ':' and filter removed
> > > >   *   filter            - the optional filter text following (and including) 'if'
> > > >   *
> > > >   * To illustrate the use of these components, here are some concrete
> > > > --
> > > > 2.43.0
> > > >
> > >
> > >
> > > --
> > > Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > --
> > Regards
> > Abhijith Sriram
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>

Regards
Abhijith Sriram

^ permalink raw reply

* [PATCH v3] kernel/trace: fixed static warnings
From: abhijithsriram95 @ 2026-04-06  6:00 UTC (permalink / raw)
  To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Abhijith Sriram, open list, open list:TRACING

From: Abhijith Sriram <abhijithsriram95@gmail.com>

The change in the function argument description
was due to the static code checker script reading
the word filter back to back

Changes in v2:
- corrected *m = file->private_data to m = file->private_data

Changes in v3:
- reverted the changes for struct seq_file *m and
  added a new empty line instead

Signed-off-by: Abhijith Sriram <abhijithsriram95@gmail.com>
---
 kernel/trace/trace_events_trigger.c | 8 +++++---
 new-changes                         | 6 ++++++
 2 files changed, 11 insertions(+), 3 deletions(-)
 create mode 100644 new-changes

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 655db2e82513..08adb593fcd9 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -246,7 +246,7 @@ event_triggers_post_call(struct trace_event_file *file,
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
 
-#define SHOW_AVAILABLE_TRIGGERS	(void *)(1UL)
+#define SHOW_AVAILABLE_TRIGGERS	((void *)(1UL))
 
 static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
 {
@@ -352,6 +352,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
 		ret = seq_open(file, &event_triggers_seq_ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
+
 			m->private = file;
 		}
 	}
@@ -388,9 +389,9 @@ static ssize_t event_trigger_regex_write(struct file *file,
 					 const char __user *ubuf,
 					 size_t cnt, loff_t *ppos)
 {
+	char *buf __free(kfree) = NULL;
 	struct trace_event_file *event_file;
 	ssize_t ret;
-	char *buf __free(kfree) = NULL;
 
 	if (!cnt)
 		return 0;
@@ -633,6 +634,7 @@ clear_event_triggers(struct trace_array *tr)
 
 	list_for_each_entry(file, &tr->events, list) {
 		struct event_trigger_data *data, *n;
+
 		list_for_each_entry_safe(data, n, &file->triggers, list) {
 			trace_event_trigger_enable_disable(file, 0);
 			list_del_rcu(&data->list);
@@ -785,7 +787,7 @@ static void unregister_trigger(char *glob,
  *   cmd               - the trigger command name
  *   glob              - the trigger command name optionally prefaced with '!'
  *   param_and_filter  - text following cmd and ':'
- *   param             - text following cmd and ':' and stripped of filter
+ *   param             - text following cmd and ':' and filter removed
  *   filter            - the optional filter text following (and including) 'if'
  *
  * To illustrate the use of these components, here are some concrete
diff --git a/new-changes b/new-changes
new file mode 100644
index 000000000000..9e3a24de3033
--- /dev/null
+++ b/new-changes
@@ -0,0 +1,6 @@
+Line 25 -> adding const to the pointer address as well.
+
+linw 1193 -> removing else because there is a return statement in the if condition
+line 1727 -> adding new line after statement
+line 1800 -> reordering to solve missing a blank line warning
+line 12364 -> changed the function to kstrtoul
\ No newline at end of file
-- 
2.43.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox