Linux Security Modules development
 help / color / mirror / Atom feed
* [RFC PATCH 04/20] selftests/landlock: Cover LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
From: Justin Suess @ 2026-04-07 20:01 UTC (permalink / raw)
  To: ast, daniel, andrii, kpsingh, paul, mic, viro, brauner, kees
  Cc: gnoack, jack, jmorris, serge, song, yonghong.song, martin.lau, m,
	eddyz87, john.fastabend, sdf, skhan, bpf, linux-security-module,
	linux-kernel, linux-fsdevel, Justin Suess
In-Reply-To: <20260407200157.3874806-1-utilityemal77@gmail.com>

Add tests to cover LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS.

Add a new field to the scoped domain variant specifying whether
the test is to be run by manually calling
prctl(PR_SET_NO_NEW_PRIVS,...) or to call it with this flag. Add
variants for the scoped domain tests validating the flag works
identically to the manual prctl call for userspace code.

Fix a small issue in restrict_self_checks_ordering which assumed
-1 was always an invalid flag by properly computing an invalid
flag from the last known flag.

Signed-off-by: Justin Suess <utilityemal77@gmail.com>
---
 tools/testing/selftests/landlock/base_test.c  |   8 +-
 tools/testing/selftests/landlock/common.h     |  24 +++-
 tools/testing/selftests/landlock/fs_test.c    | 103 ++++++++++--------
 tools/testing/selftests/landlock/net_test.c   |  55 ++++++----
 .../testing/selftests/landlock/ptrace_test.c  |  14 +--
 .../landlock/scoped_abstract_unix_test.c      |  51 ++++++---
 .../selftests/landlock/scoped_base_variants.h |  23 ++++
 .../selftests/landlock/scoped_common.h        |   5 +-
 .../selftests/landlock/scoped_signal_test.c   |  30 +++--
 9 files changed, 206 insertions(+), 107 deletions(-)

diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 30d37234086c..a4c38541de70 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -244,6 +244,8 @@ TEST(restrict_self_checks_ordering)
 	};
 	const int ruleset_fd =
 		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	const int last_flag = LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS;
+	const int invalid_flag = last_flag << 1;
 
 	ASSERT_LE(0, ruleset_fd);
 	path_beneath_attr.parent_fd =
@@ -255,7 +257,7 @@ TEST(restrict_self_checks_ordering)
 
 	/* Checks unprivileged enforcement without no_new_privs. */
 	drop_caps(_metadata);
-	ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+	ASSERT_EQ(-1, landlock_restrict_self(-1, invalid_flag));
 	ASSERT_EQ(EPERM, errno);
 	ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
 	ASSERT_EQ(EPERM, errno);
@@ -265,7 +267,7 @@ TEST(restrict_self_checks_ordering)
 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
 
 	/* Checks invalid flags. */
-	ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+	ASSERT_EQ(-1, landlock_restrict_self(-1, invalid_flag));
 	ASSERT_EQ(EINVAL, errno);
 
 	/* Checks invalid ruleset FD. */
@@ -306,7 +308,7 @@ TEST(restrict_self_fd_logging_flags)
 
 TEST(restrict_self_logging_flags)
 {
-	const __u32 last_flag = LANDLOCK_RESTRICT_SELF_TSYNC;
+	const __u32 last_flag = LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS;
 
 	/* Tests invalid flag combinations. */
 
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 90551650299c..f6d6a6a99c52 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -194,11 +194,27 @@ static int __maybe_unused send_fd(int usock, int fd_tx)
 	return 0;
 }
 
+/*
+ * Scoped domain options
+ */
+struct scoped_domain_opts {
+	bool use_restrict_self_no_new_privs;
+};
+
+static const struct scoped_domain_opts default_scoped_domain_opts = { 0 };
+
 static void __maybe_unused
-enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
+enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd,
+		const struct scoped_domain_opts opts)
 {
-	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
-	ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+	/* Skip the explicit prctl() when the syscall flag sets no_new_privs. */
+	if (!opts.use_restrict_self_no_new_privs)
+		ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+	ASSERT_EQ(0,
+		  landlock_restrict_self(ruleset_fd,
+					 opts.use_restrict_self_no_new_privs ?
+					 LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS :
+					 0))
 	{
 		TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
 	}
@@ -216,7 +232,7 @@ drop_access_rights(struct __test_metadata *const _metadata,
 	{
 		TH_LOG("Failed to create a ruleset: %s", strerror(errno));
 	}
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index cdb47fc1fc0a..b82b44405dbe 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -790,7 +790,18 @@ static void enforce_fs(struct __test_metadata *const _metadata,
 {
 	const int ruleset_fd = create_ruleset(_metadata, access_fs, rules);
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
+	EXPECT_EQ(0, close(ruleset_fd));
+}
+
+static void enforce_resolve_unix(struct __test_metadata *const _metadata,
+				 const struct rule rules[],
+				 const struct scoped_domain_opts opts)
+{
+	const int ruleset_fd =
+		create_ruleset(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, rules);
+
+	enforce_ruleset(_metadata, ruleset_fd, opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 
@@ -805,14 +816,15 @@ TEST_F_FORK(layout0, proc_nsfs)
 		{},
 	};
 	struct landlock_path_beneath_attr path_beneath;
-	const int ruleset_fd = create_ruleset(
-		_metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
-		rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata,
+			       rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
+			       rules);
 
 	ASSERT_LE(0, ruleset_fd);
 	ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
 	ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY));
@@ -862,7 +874,7 @@ TEST_F_FORK(layout0, unpriv)
 	ASSERT_EQ(EPERM, errno);
 
 	/* enforce_ruleset() calls prctl(no_new_privs). */
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	ASSERT_EQ(0, close(ruleset_fd));
 }
 
@@ -1289,7 +1301,7 @@ TEST_F_FORK(layout1, inherit_subset)
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
 	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
@@ -1322,7 +1334,7 @@ TEST_F_FORK(layout1, inherit_subset)
 	 * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would
 	 * be a privilege escalation.
 	 */
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	/* Same tests and results as above. */
 	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
@@ -1343,7 +1355,7 @@ TEST_F_FORK(layout1, inherit_subset)
 	 * directory: dir_s1d1.
 	 */
 	add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1);
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	/* Same tests and results as above. */
 	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
@@ -1366,7 +1378,7 @@ TEST_F_FORK(layout1, inherit_subset)
 	 */
 	add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
 			 dir_s1d3);
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	ASSERT_EQ(0, close(ruleset_fd));
 
 	/*
@@ -1404,7 +1416,7 @@ TEST_F_FORK(layout1, inherit_superset)
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	/* Readdir access is denied for dir_s1d2. */
 	ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
@@ -1418,7 +1430,7 @@ TEST_F_FORK(layout1, inherit_superset)
 			 LANDLOCK_ACCESS_FS_READ_FILE |
 				 LANDLOCK_ACCESS_FS_READ_DIR,
 			 dir_s1d2);
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 
 	/* Readdir access is still denied for dir_s1d2. */
@@ -1442,7 +1454,8 @@ TEST_F_FORK(layout0, max_layers)
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
 	for (i = 0; i < 16; i++)
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 
 	for (i = 0; i < 2; i++) {
 		err = landlock_restrict_self(ruleset_fd, 0);
@@ -1472,12 +1485,12 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
 	/* Nests a policy which denies read access to all directories. */
 	ruleset_fd =
 		create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, NULL);
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
 	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
 
 	/* Enforces a second time with the same ruleset. */
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	ASSERT_EQ(0, close(ruleset_fd));
 }
 
@@ -1725,7 +1738,7 @@ TEST_F_FORK(layout1, release_inodes)
 	ASSERT_EQ(0, umount(dir_s3d2));
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 
 	ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
@@ -1766,7 +1779,7 @@ TEST_F_FORK(layout1, covered_rule)
 
 	ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	ASSERT_EQ(0, close(ruleset_fd));
 
 	/* Checks that access to the new mount point is denied. */
@@ -1828,7 +1841,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
 	}
 
 	set_cap(_metadata, CAP_SYS_CHROOT);
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	switch (rel) {
 	case REL_OPEN:
@@ -4402,9 +4415,9 @@ static void test_connect_to_parent(struct __test_metadata *const _metadata,
 	char buf[1];
 
 	if (variant->domain_both)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, NULL);
+		enforce_resolve_unix(_metadata, NULL, variant->domain_opts);
 	else if (flags & ENFORCE_ALL)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, rules);
+		enforce_resolve_unix(_metadata, rules, variant->domain_opts);
 
 	unlink(path);
 	ASSERT_EQ(0, pipe2(readiness_pipe, O_CLOEXEC));
@@ -4414,11 +4427,11 @@ static void test_connect_to_parent(struct __test_metadata *const _metadata,
 
 	if (child_pid == 0) {
 		if (variant->domain_child)
-			enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
-				   NULL);
+			enforce_resolve_unix(_metadata, NULL,
+					     variant->domain_opts);
 		else if (flags & ENFORCE_ALL)
-			enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
-				   rules);
+			enforce_resolve_unix(_metadata, rules,
+					     variant->domain_opts);
 
 		/* Wait for server to be available. */
 		EXPECT_EQ(0, close(readiness_pipe[1]));
@@ -4444,9 +4457,9 @@ static void test_connect_to_parent(struct __test_metadata *const _metadata,
 	}
 
 	if (variant->domain_parent)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, NULL);
+		enforce_resolve_unix(_metadata, NULL, variant->domain_opts);
 	else if (flags & ENFORCE_ALL)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, rules);
+		enforce_resolve_unix(_metadata, rules, variant->domain_opts);
 
 	srv_fd = set_up_named_unix_server(_metadata, sock_type, path);
 
@@ -4485,9 +4498,9 @@ static void test_connect_to_child(struct __test_metadata *const _metadata,
 	char buf[1];
 
 	if (variant->domain_both)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, NULL);
+		enforce_resolve_unix(_metadata, NULL, variant->domain_opts);
 	else if (flags & ENFORCE_ALL)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, rules);
+		enforce_resolve_unix(_metadata, rules, variant->domain_opts);
 
 	unlink(path);
 	ASSERT_EQ(0, pipe2(readiness_pipe, O_CLOEXEC));
@@ -4498,11 +4511,11 @@ static void test_connect_to_child(struct __test_metadata *const _metadata,
 
 	if (child_pid == 0) {
 		if (variant->domain_child)
-			enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
-				   NULL);
+			enforce_resolve_unix(_metadata, NULL,
+					     variant->domain_opts);
 		else if (flags & ENFORCE_ALL)
-			enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
-				   rules);
+			enforce_resolve_unix(_metadata, rules,
+					     variant->domain_opts);
 
 		srv_fd = set_up_named_unix_server(_metadata, sock_type, path);
 
@@ -4526,9 +4539,9 @@ static void test_connect_to_child(struct __test_metadata *const _metadata,
 	}
 
 	if (variant->domain_parent)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, NULL);
+		enforce_resolve_unix(_metadata, NULL, variant->domain_opts);
 	else if (flags & ENFORCE_ALL)
-		enforce_fs(_metadata, LANDLOCK_ACCESS_FS_RESOLVE_UNIX, rules);
+		enforce_resolve_unix(_metadata, rules, variant->domain_opts);
 
 	/* Wait for server to be available. */
 	EXPECT_EQ(0, close(readiness_pipe[1]));
@@ -5072,7 +5085,7 @@ TEST_F_FORK(layout1_bind, path_disconnected)
 		create_ruleset(_metadata, ACCESS_RW, layer3_only_s1d2);
 	int bind_s1d3_fd;
 
-	enforce_ruleset(_metadata, ruleset_fd_l1);
+	enforce_ruleset(_metadata, ruleset_fd_l1, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd_l1));
 
 	bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
@@ -5102,7 +5115,7 @@ TEST_F_FORK(layout1_bind, path_disconnected)
 		  test_open_rel(bind_s1d3_fd, "..", O_RDONLY | O_DIRECTORY));
 
 	/* This should still work with a narrower rule. */
-	enforce_ruleset(_metadata, ruleset_fd_l2);
+	enforce_ruleset(_metadata, ruleset_fd_l2, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd_l2));
 
 	EXPECT_EQ(0, test_open(file1_s4d1, O_RDONLY));
@@ -5114,7 +5127,7 @@ TEST_F_FORK(layout1_bind, path_disconnected)
 	EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
 	EXPECT_EQ(EACCES, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
 
-	enforce_ruleset(_metadata, ruleset_fd_l3);
+	enforce_ruleset(_metadata, ruleset_fd_l3, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd_l3));
 
 	EXPECT_EQ(EACCES, test_open(file1_s4d1, O_RDONLY));
@@ -5176,7 +5189,7 @@ TEST_F_FORK(layout1_bind, path_disconnected_rename)
 	ruleset_fd_l2 = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
 				       layer2_only_s1d2);
 
-	enforce_ruleset(_metadata, ruleset_fd_l1);
+	enforce_ruleset(_metadata, ruleset_fd_l1, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd_l1));
 
 	bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
@@ -5201,7 +5214,8 @@ TEST_F_FORK(layout1_bind, path_disconnected_rename)
 	child_pid = fork();
 	ASSERT_LE(0, child_pid);
 	if (child_pid == 0) {
-		enforce_ruleset(_metadata, ruleset_fd_l2);
+		enforce_ruleset(_metadata, ruleset_fd_l2,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd_l2));
 		EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
 		EXPECT_EQ(EACCES, test_open(file1_s4d2, O_RDONLY));
@@ -5238,7 +5252,8 @@ TEST_F_FORK(layout1_bind, path_disconnected_rename)
 	child_pid = fork();
 	ASSERT_LE(0, child_pid);
 	if (child_pid == 0) {
-		enforce_ruleset(_metadata, ruleset_fd_l2);
+		enforce_ruleset(_metadata, ruleset_fd_l2,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd_l2));
 		EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
 		EXPECT_EQ(0, test_open(file1_s1d3, O_RDONLY));
@@ -5290,7 +5305,7 @@ TEST_F_FORK(layout1_bind, path_disconnected_rename)
 	}
 
 	/* Checks again that we can access it under l2. */
-	enforce_ruleset(_metadata, ruleset_fd_l2);
+	enforce_ruleset(_metadata, ruleset_fd_l2, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd_l2));
 	EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
 	EXPECT_EQ(0, test_open(file1_s1d3, O_RDONLY));
@@ -5914,7 +5929,7 @@ TEST_F_FORK(layout4_disconnected_leafs, read_rename_exchange)
 	EXPECT_EQ(ENOENT, test_open_rel(s1d41_bind_fd, "..", O_DIRECTORY));
 	EXPECT_EQ(ENOENT, test_open_rel(s1d42_bind_fd, "..", O_DIRECTORY));
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 
 	EXPECT_EQ(variant->expected_read_result,
@@ -6430,7 +6445,7 @@ TEST_F_FORK(layout5_disconnected_branch, read_rename_exchange)
 	EXPECT_EQ(0, test_open_rel(s1d3_bind_fd, "..", O_DIRECTORY));
 	EXPECT_EQ(ENOENT, test_open_rel(s1d3_bind_fd, "../..", O_DIRECTORY));
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 
 	EXPECT_EQ(variant->expected_read_result,
@@ -7201,7 +7216,7 @@ TEST_F_FORK(layout3_fs, release_inodes)
 	ASSERT_EQ(0, mount_opt(&mnt_tmp, TMP_DIR));
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	ASSERT_EQ(0, close(ruleset_fd));
 
 	/* Checks that access to the new mount point is denied. */
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 4c528154ea92..33a39a264f6b 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -671,7 +671,8 @@ TEST_F(protocol, bind)
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_connect_p1, 0));
 
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -721,7 +722,8 @@ TEST_F(protocol, connect)
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_p1, 0));
 
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -755,7 +757,8 @@ TEST_F(protocol, bind_unspec)
 		ASSERT_EQ(0,
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind, 0));
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -788,7 +791,8 @@ TEST_F(protocol, bind_unspec)
 		ASSERT_LE(0, ruleset_fd);
 
 		/* Denies bind. */
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -874,7 +878,8 @@ TEST_F(protocol, connect_unspec)
 			ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
 						       LANDLOCK_RULE_NET_PORT,
 						       &tcp_connect, 0));
-			enforce_ruleset(_metadata, ruleset_fd);
+			enforce_ruleset(_metadata, ruleset_fd,
+					default_scoped_domain_opts);
 			EXPECT_EQ(0, close(ruleset_fd));
 		}
 
@@ -902,7 +907,8 @@ TEST_F(protocol, connect_unspec)
 			ASSERT_LE(0, ruleset_fd);
 
 			/* Denies connect. */
-			enforce_ruleset(_metadata, ruleset_fd);
+			enforce_ruleset(_metadata, ruleset_fd,
+					default_scoped_domain_opts);
 			EXPECT_EQ(0, close(ruleset_fd));
 		}
 
@@ -1034,7 +1040,8 @@ TEST_F(ipv4, from_unix_to_inet)
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_connect_p0, 0));
 
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1181,7 +1188,8 @@ TEST_F(tcp_layers, ruleset_overlap)
 		ASSERT_EQ(0,
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_connect, 0));
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1197,7 +1205,8 @@ TEST_F(tcp_layers, ruleset_overlap)
 		ASSERT_EQ(0,
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind, 0));
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1213,7 +1222,8 @@ TEST_F(tcp_layers, ruleset_overlap)
 		ASSERT_EQ(0,
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_connect, 0));
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1244,7 +1254,8 @@ TEST_F(tcp_layers, ruleset_expand)
 		ASSERT_EQ(0,
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &bind_srv0, 0));
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1276,7 +1287,8 @@ TEST_F(tcp_layers, ruleset_expand)
 		ASSERT_EQ(0,
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_p1, 0));
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1298,7 +1310,8 @@ TEST_F(tcp_layers, ruleset_expand)
 		ASSERT_EQ(0,
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_p0, 0));
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1546,7 +1559,7 @@ TEST_F(mini, tcp_port_overflow)
 					&port_overflow4, 0));
 	EXPECT_EQ(EINVAL, errno);
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	test_bind_and_connect(_metadata, &srv_denied, true, true);
 	test_bind_and_connect(_metadata, &srv_max_allowed, false, false);
@@ -1611,7 +1624,7 @@ TEST_F(ipv4_tcp, port_endianness)
 				       &connect_big_endian_p0, 0));
 	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 				       &bind_connect_host_endian_p1, 0));
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 
 	/* No restriction for big endinan CPU. */
 	test_bind_and_connect(_metadata, &self->srv0, false, little_endian);
@@ -1652,7 +1665,7 @@ TEST_F(ipv4_tcp, with_fs)
 	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 				       &tcp_bind, 0));
 
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 
 	/* Tests file access. */
@@ -1766,7 +1779,8 @@ TEST_F(port_specific, bind_connect_zero)
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_connect_zero, 0));
 
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1843,7 +1857,8 @@ TEST_F(port_specific, bind_connect_1023)
 			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
 					    &tcp_bind_connect, 0));
 
-		enforce_ruleset(_metadata, ruleset_fd);
+		enforce_ruleset(_metadata, ruleset_fd,
+				default_scoped_domain_opts);
 		EXPECT_EQ(0, close(ruleset_fd));
 	}
 
@@ -1982,7 +1997,7 @@ TEST_F(audit, bind)
 	ruleset_fd =
 		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(0, ruleset_fd);
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 
 	sock_fd = socket_variant(&self->srv0);
@@ -2010,7 +2025,7 @@ TEST_F(audit, connect)
 	ruleset_fd =
 		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(0, ruleset_fd);
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, default_scoped_domain_opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 
 	sock_fd = socket_variant(&self->srv0);
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 1b6c8b53bf33..1c29cde8707a 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -25,7 +25,8 @@
 #define YAMA_SCOPE_DISABLED 0
 #define YAMA_SCOPE_RELATIONAL 1
 
-static void create_domain(struct __test_metadata *const _metadata)
+static void create_domain(struct __test_metadata *const _metadata,
+			  const struct scoped_domain_opts opts)
 {
 	int ruleset_fd;
 	struct landlock_ruleset_attr ruleset_attr = {
@@ -38,8 +39,7 @@ static void create_domain(struct __test_metadata *const _metadata)
 	{
 		TH_LOG("Failed to create a ruleset: %s", strerror(errno));
 	}
-	EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
-	EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+	enforce_ruleset(_metadata, ruleset_fd, opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 
@@ -169,7 +169,7 @@ TEST_F(scoped_domains, trace)
 	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
 	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
 	if (variant->domain_both) {
-		create_domain(_metadata);
+		create_domain(_metadata, variant->domain_opts);
 		if (!__test_passed(_metadata))
 			/* Aborts before forking. */
 			return;
@@ -183,7 +183,7 @@ TEST_F(scoped_domains, trace)
 		ASSERT_EQ(0, close(pipe_parent[1]));
 		ASSERT_EQ(0, close(pipe_child[0]));
 		if (variant->domain_child)
-			create_domain(_metadata);
+			create_domain(_metadata, variant->domain_opts);
 
 		/* Waits for the parent to be in a domain, if any. */
 		ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
@@ -238,7 +238,7 @@ TEST_F(scoped_domains, trace)
 	ASSERT_EQ(0, close(pipe_child[1]));
 	ASSERT_EQ(0, close(pipe_parent[0]));
 	if (variant->domain_parent)
-		create_domain(_metadata);
+		create_domain(_metadata, variant->domain_opts);
 
 	/* Signals that the parent is in a domain, if any. */
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
@@ -396,7 +396,7 @@ TEST_F(audit, trace)
 
 	ASSERT_EQ(0, close(pipe_child[1]));
 	ASSERT_EQ(0, close(pipe_parent[0]));
-	create_domain(_metadata);
+	create_domain(_metadata, default_scoped_domain_opts);
 
 	/* Signals that the parent is in a domain. */
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
index c47491d2d1c1..d89f54edf9d5 100644
--- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
+++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
@@ -88,7 +88,8 @@ TEST_F(scoped_domains, connect_to_parent)
 	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
 	if (variant->domain_both) {
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     variant->domain_opts);
 		if (!__test_passed(_metadata))
 			return;
 	}
@@ -103,7 +104,8 @@ TEST_F(scoped_domains, connect_to_parent)
 		EXPECT_EQ(0, close(pipe_parent[1]));
 		if (variant->domain_child)
 			create_scoped_domain(
-				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				variant->domain_opts);
 
 		stream_client = socket(AF_UNIX, SOCK_STREAM, 0);
 		ASSERT_LE(0, stream_client);
@@ -138,7 +140,8 @@ TEST_F(scoped_domains, connect_to_parent)
 	EXPECT_EQ(0, close(pipe_parent[0]));
 	if (variant->domain_parent)
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     variant->domain_opts);
 
 	stream_server = socket(AF_UNIX, SOCK_STREAM, 0);
 	ASSERT_LE(0, stream_server);
@@ -186,7 +189,8 @@ TEST_F(scoped_domains, connect_to_child)
 	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
 	if (variant->domain_both) {
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     variant->domain_opts);
 		if (!__test_passed(_metadata))
 			return;
 	}
@@ -200,7 +204,8 @@ TEST_F(scoped_domains, connect_to_child)
 		EXPECT_EQ(0, close(pipe_child[0]));
 		if (variant->domain_child)
 			create_scoped_domain(
-				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				variant->domain_opts);
 
 		/* Waits for the parent to be in a domain, if any. */
 		ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
@@ -231,7 +236,8 @@ TEST_F(scoped_domains, connect_to_child)
 
 	if (variant->domain_parent)
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     variant->domain_opts);
 
 	/* Signals that the parent is in a domain, if any. */
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
@@ -344,7 +350,8 @@ TEST_F(scoped_audit, connect_to_child)
 	EXPECT_EQ(0, close(pipe_child[1]));
 	EXPECT_EQ(0, close(pipe_parent[0]));
 
-	create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+	create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+			     default_scoped_domain_opts);
 
 	/* Signals that the parent is in a domain, if any. */
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
@@ -429,7 +436,8 @@ TEST_F(scoped_vs_unscoped, unix_scoping)
 		create_fs_domain(_metadata);
 	else if (variant->domain_all == SCOPE_SANDBOX)
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     default_scoped_domain_opts);
 
 	child = fork();
 	ASSERT_LE(0, child);
@@ -444,7 +452,8 @@ TEST_F(scoped_vs_unscoped, unix_scoping)
 			create_fs_domain(_metadata);
 		else if (variant->domain_children == SCOPE_SANDBOX)
 			create_scoped_domain(
-				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				default_scoped_domain_opts);
 
 		grand_child = fork();
 		ASSERT_LE(0, grand_child);
@@ -461,7 +470,8 @@ TEST_F(scoped_vs_unscoped, unix_scoping)
 			else if (variant->domain_grand_child == SCOPE_SANDBOX)
 				create_scoped_domain(
 					_metadata,
-					LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+					LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+					default_scoped_domain_opts);
 
 			stream_client = socket(AF_UNIX, SOCK_STREAM, 0);
 			ASSERT_LE(0, stream_client);
@@ -525,7 +535,8 @@ TEST_F(scoped_vs_unscoped, unix_scoping)
 			create_fs_domain(_metadata);
 		else if (variant->domain_child == SCOPE_SANDBOX)
 			create_scoped_domain(
-				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				default_scoped_domain_opts);
 
 		stream_server_child = socket(AF_UNIX, SOCK_STREAM, 0);
 		ASSERT_LE(0, stream_server_child);
@@ -552,7 +563,8 @@ TEST_F(scoped_vs_unscoped, unix_scoping)
 		create_fs_domain(_metadata);
 	else if (variant->domain_parent == SCOPE_SANDBOX)
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     default_scoped_domain_opts);
 
 	stream_server_parent = socket(AF_UNIX, SOCK_STREAM, 0);
 	ASSERT_LE(0, stream_server_parent);
@@ -656,7 +668,8 @@ TEST_F(outside_socket, socket_with_different_domain)
 
 		/* Client always has a domain. */
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     default_scoped_domain_opts);
 
 		if (variant->child_socket) {
 			int data_socket, passed_socket, stream_server;
@@ -713,7 +726,8 @@ TEST_F(outside_socket, socket_with_different_domain)
 	ASSERT_LE(0, server_socket);
 
 	/* Server always has a domain. */
-	create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+	create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+			     default_scoped_domain_opts);
 
 	ASSERT_EQ(0, bind(server_socket, &self->address.unix_addr,
 			  self->address.unix_addr_len));
@@ -820,7 +834,8 @@ TEST_F(various_address_sockets, scoped_pathname_sockets)
 
 		if (variant->domain == SCOPE_SANDBOX)
 			create_scoped_domain(
-				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				default_scoped_domain_opts);
 		else if (variant->domain == OTHER_SANDBOX)
 			create_fs_domain(_metadata);
 
@@ -1027,7 +1042,8 @@ TEST(datagram_sockets)
 
 		/* Scopes the domain. */
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     default_scoped_domain_opts);
 
 		/*
 		 * Connected socket sends data to the receiver, but the
@@ -1108,7 +1124,8 @@ TEST(self_connect)
 	if (child == 0) {
 		/* Child's domain is scoped. */
 		create_scoped_domain(_metadata,
-				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+				     LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+				     default_scoped_domain_opts);
 
 		/*
 		 * The child inherits the sockets, and cannot connect or
diff --git a/tools/testing/selftests/landlock/scoped_base_variants.h b/tools/testing/selftests/landlock/scoped_base_variants.h
index 7116728ebc68..bbdf19ef18ef 100644
--- a/tools/testing/selftests/landlock/scoped_base_variants.h
+++ b/tools/testing/selftests/landlock/scoped_base_variants.h
@@ -20,6 +20,7 @@ FIXTURE_VARIANT(scoped_domains)
 	bool domain_both;
 	bool domain_parent;
 	bool domain_child;
+	struct scoped_domain_opts domain_opts;
 };
 
 /*
@@ -54,6 +55,17 @@ FIXTURE_VARIANT_ADD(scoped_domains, child_domain) {
 	.domain_child = true,
 };
 
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, child_domain_restrict_self_no_new_privs) {
+	/* clang-format on */
+	.domain_both = false,
+	.domain_parent = false,
+	.domain_child = true,
+	.domain_opts = {
+		.use_restrict_self_no_new_privs = true,
+	},
+};
+
 /*
  *        Parent domain
  * .------.
@@ -70,6 +82,17 @@ FIXTURE_VARIANT_ADD(scoped_domains, parent_domain) {
 	.domain_child = false,
 };
 
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, parent_domain_restrict_self_no_new_privs) {
+	/* clang-format on */
+	.domain_both = false,
+	.domain_parent = true,
+	.domain_child = false,
+	.domain_opts = {
+		.use_restrict_self_no_new_privs = true,
+	},
+};
+
 /*
  *        Parent + child domain (siblings)
  * .------.
diff --git a/tools/testing/selftests/landlock/scoped_common.h b/tools/testing/selftests/landlock/scoped_common.h
index a9a912d30c4d..23990758eef8 100644
--- a/tools/testing/selftests/landlock/scoped_common.h
+++ b/tools/testing/selftests/landlock/scoped_common.h
@@ -10,7 +10,8 @@
 #include <sys/types.h>
 
 static void create_scoped_domain(struct __test_metadata *const _metadata,
-				 const __u16 scope)
+				 const __u16 scope,
+				 const struct scoped_domain_opts opts)
 {
 	int ruleset_fd;
 	const struct landlock_ruleset_attr ruleset_attr = {
@@ -23,6 +24,6 @@ static void create_scoped_domain(struct __test_metadata *const _metadata,
 	{
 		TH_LOG("Failed to create a ruleset: %s", strerror(errno));
 	}
-	enforce_ruleset(_metadata, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd, opts);
 	EXPECT_EQ(0, close(ruleset_fd));
 }
diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c
index d8bf33417619..dfda4a3e5374 100644
--- a/tools/testing/selftests/landlock/scoped_signal_test.c
+++ b/tools/testing/selftests/landlock/scoped_signal_test.c
@@ -111,7 +111,8 @@ TEST_F(scoping_signals, send_sig_to_parent)
 		ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
 		EXPECT_EQ(0, close(pipe_parent[0]));
 
-		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+				     default_scoped_domain_opts);
 
 		/*
 		 * The child process cannot send signal to the parent
@@ -183,7 +184,8 @@ TEST_F(scoped_domains, check_access_signal)
 	can_signal_child = !variant->domain_parent;
 
 	if (variant->domain_both)
-		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+				     variant->domain_opts);
 
 	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
 	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
@@ -197,7 +199,8 @@ TEST_F(scoped_domains, check_access_signal)
 		EXPECT_EQ(0, close(pipe_parent[1]));
 
 		if (variant->domain_child)
-			create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+			create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+					     variant->domain_opts);
 
 		ASSERT_EQ(1, write(pipe_child[1], ".", 1));
 		EXPECT_EQ(0, close(pipe_child[1]));
@@ -226,7 +229,8 @@ TEST_F(scoped_domains, check_access_signal)
 	EXPECT_EQ(0, close(pipe_child[1]));
 
 	if (variant->domain_parent)
-		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+				     variant->domain_opts);
 
 	ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
 	EXPECT_EQ(0, close(pipe_child[0]));
@@ -280,7 +284,8 @@ TEST(signal_scoping_thread_before)
 				    &thread_pipe[0]));
 
 	/* Enforces restriction after creating the thread. */
-	create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+	create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+			     default_scoped_domain_opts);
 
 	EXPECT_EQ(0, pthread_kill(no_sandbox_thread, 0));
 	EXPECT_EQ(1, write(thread_pipe[1], ".", 1));
@@ -302,7 +307,8 @@ TEST(signal_scoping_thread_after)
 	ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC));
 
 	/* Enforces restriction before creating the thread. */
-	create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+	create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+			     default_scoped_domain_opts);
 
 	ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_sync,
 				    &thread_pipe[0]));
@@ -360,7 +366,8 @@ TEST(signal_scoping_thread_setuid)
 				    &arg));
 
 	/* Enforces restriction after creating the thread. */
-	create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+	create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+			     default_scoped_domain_opts);
 
 	EXPECT_NE(arg.new_uid, getuid());
 	EXPECT_EQ(0, setuid(arg.new_uid));
@@ -469,7 +476,8 @@ TEST_F(fown, sigurg_socket)
 	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
 
 	if (variant->sandbox_setown == SANDBOX_BEFORE_FORK)
-		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+				     default_scoped_domain_opts);
 
 	child = fork();
 	ASSERT_LE(0, child);
@@ -531,7 +539,8 @@ TEST_F(fown, sigurg_socket)
 	ASSERT_LE(0, recv_socket);
 
 	if (variant->sandbox_setown == SANDBOX_BEFORE_SETOWN)
-		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+				     default_scoped_domain_opts);
 
 	/*
 	 * Sets the child to receive SIGURG for MSG_OOB.  This uncommon use is
@@ -540,7 +549,8 @@ TEST_F(fown, sigurg_socket)
 	ASSERT_EQ(0, fcntl(recv_socket, F_SETOWN, child));
 
 	if (variant->sandbox_setown == SANDBOX_AFTER_SETOWN)
-		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+		create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL,
+				     default_scoped_domain_opts);
 
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
 
-- 
2.53.0


^ permalink raw reply related

* [RFC PATCH 03/20] landlock: Implement LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
From: Justin Suess @ 2026-04-07 20:01 UTC (permalink / raw)
  To: ast, daniel, andrii, kpsingh, paul, mic, viro, brauner, kees
  Cc: gnoack, jack, jmorris, serge, song, yonghong.song, martin.lau, m,
	eddyz87, john.fastabend, sdf, skhan, bpf, linux-security-module,
	linux-kernel, linux-fsdevel, Justin Suess
In-Reply-To: <20260407200157.3874806-1-utilityemal77@gmail.com>

Add a flag LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS, which executes
task_set_no_new_privs on the current credentials, but only if
the process lacks the CAP_SYS_ADMIN capability.

While this operation is redundant for code running from userspace
(indeed callers may achieve the same logic by calling
prctl w/ PR_SET_NO_NEW_PRIVS), this flag enables callers without access
to the syscall abi (defined in subsequent patches) to restrict processes
from gaining additional capabilities. This is important to ensure that
consumers can meet the task_no_new_privs || CAP_SYS_ADMIN invariant
enforced by Landlock without having syscall access.

Signed-off-by: Justin Suess <utilityemal77@gmail.com>
---
 include/uapi/linux/landlock.h | 14 ++++++++++++++
 security/landlock/limits.h    |  2 +-
 security/landlock/ruleset.c   | 12 +++++++++++-
 security/landlock/syscalls.c  |  7 +++++++
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 10a346e55e95..de2537755bbe 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -131,12 +131,26 @@ struct landlock_ruleset_attr {
  *
  *     If the calling thread is running with no_new_privs, this operation
  *     enables no_new_privs on the sibling threads as well.
+ *
+ * %LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
+ *    Sets no_new_privs on the calling thread before applying the Landlock domain.
+ *    This flag is useful for convenience as well as for applying a ruleset from
+ *    an outside context (e.g BPF). This flag only has an effect on when both
+ *    no_new_privs isn't already set and the caller doesn't possess CAP_SYS_ADMIN.
+ *
+ *    This flag has slightly different behavior when used from BPF. Instead of
+ *    setting no_new_privs on the current task, it sets a flag on the bprm so that
+ *    no_new_privs is set on the task at exec point-of-no-return. This guarantees
+ *    that the current execution is unaffected, and may escalate as usual until the
+ *    next exec, but the resulting task cannot gain more privileges through later
+ *    exec transitions.
  */
 /* clang-format off */
 #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF		(1U << 0)
 #define LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON			(1U << 1)
 #define LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF		(1U << 2)
 #define LANDLOCK_RESTRICT_SELF_TSYNC				(1U << 3)
+#define LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS			(1U << 4)
 /* clang-format on */
 
 /**
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index b454ad73b15e..9eafc64fba3f 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -31,7 +31,7 @@
 #define LANDLOCK_MASK_SCOPE		((LANDLOCK_LAST_SCOPE << 1) - 1)
 #define LANDLOCK_NUM_SCOPE		__const_hweight64(LANDLOCK_MASK_SCOPE)
 
-#define LANDLOCK_LAST_RESTRICT_SELF	LANDLOCK_RESTRICT_SELF_TSYNC
+#define LANDLOCK_LAST_RESTRICT_SELF	LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
 #define LANDLOCK_MASK_RESTRICT_SELF	((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1)
 
 /* clang-format on */
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 2333a3dc5f33..4f0305796165 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -121,10 +121,12 @@ int landlock_restrict_cred_precheck(const __u32 flags,
 
 	/*
 	 * Similar checks as for seccomp(2), except that an -EPERM may be
-	 * returned.
+	 * returned, or no_new_privs may be set by the caller via
+	 * LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS.
 	 */
 	if (!task_no_new_privs(current) &&
 	    !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) {
+		if (!(flags & LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS))
 			return -EPERM;
 	}
 
@@ -197,6 +199,14 @@ int landlock_restrict_cred(struct cred *const cred,
 	}
 
 	if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) {
+		/*
+		 * We know we can set no_new_privs on the current task
+		 * because this path is only valid in the syscall context
+		 */
+		if ((flags & LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS) &&
+		     !task_no_new_privs(current) &&
+		     !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
+			task_set_no_new_privs(current);
 		const int tsync_err =
 			landlock_restrict_sibling_threads(current_cred(), cred);
 
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index c710e8b16150..6723806723d5 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -402,6 +402,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
  *         - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON
  *         - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF
  *         - %LANDLOCK_RESTRICT_SELF_TSYNC
+ *         - %LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
  *
  * This system call enforces a Landlock ruleset on the current thread.
  * Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its
@@ -461,5 +462,11 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 		return err;
 	}
 
+	/* In syscall context we can set no_new_privs directly. */
+	if ((flags & LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS) &&
+	    !task_no_new_privs(current) &&
+	    !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
+		task_set_no_new_privs(current);
+
 	return commit_creds(new_cred);
 }
-- 
2.53.0


^ permalink raw reply related

* [RFC PATCH 02/20] execve: Add set_nnp_on_point_of_no_return
From: Justin Suess @ 2026-04-07 20:01 UTC (permalink / raw)
  To: ast, daniel, andrii, kpsingh, paul, mic, viro, brauner, kees
  Cc: gnoack, jack, jmorris, serge, song, yonghong.song, martin.lau, m,
	eddyz87, john.fastabend, sdf, skhan, bpf, linux-security-module,
	linux-kernel, linux-fsdevel, Justin Suess
In-Reply-To: <20260407200157.3874806-1-utilityemal77@gmail.com>

Allow LSM hooks to set a new bitfield in the binprm, ensuring
that the next execution will run with task_set_no_new_privs by executing
task_set_no_new_privs only past the point of no return.

This differs semantically from task_set_no_new_privs, which is not safe
to set from bprm_creds_for_exec/creds_from_file because a failed
execution will result in no_new_privs being set on the original task.
The setting of this flag from the LSM hook will not alter the current
task's no_new_privs field until after the point of no return, so if we
have a failed execution in execve there will be no side effect.

Setting this field will not result in any change to the escalation or
LSM checks for the current execution transition, only for subsequent
ones.

Signed-off-by: Justin Suess <utilityemal77@gmail.com>
---
 fs/exec.c               | 8 ++++++++
 include/linux/binfmts.h | 7 ++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/exec.c b/fs/exec.c
index 9ea3a775d51e..6ab700af57d9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1111,6 +1111,14 @@ int begin_new_exec(struct linux_binprm * bprm)
 	 */
 	bprm->point_of_no_return = true;
 
+	/*
+	 * If requested that we set NO_NEW_PRIVS on the task, do so now that we're
+	 * committed to exec. We set it here in case it wasn't safe to set it
+	 * before the point of no return.
+	 */
+	if (bprm->set_nnp_on_point_of_no_return)
+		task_set_no_new_privs(current);
+
 	/* Make this the only thread in the thread group */
 	retval = de_thread(me);
 	if (retval)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 65abd5ab8836..9e420b055c4a 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -49,7 +49,12 @@ struct linux_binprm {
 		 * Set by user space to check executability according to the
 		 * caller's environment.
 		 */
-		is_check:1;
+		is_check:1,
+		/*
+		 * Set when a NNP should be applied to the new program's
+		 * credentials during exec past the point of no return.
+		 */
+		set_nnp_on_point_of_no_return:1;
 	struct file *executable; /* Executable to pass to the interpreter */
 	struct file *interpreter;
 	struct file *file;
-- 
2.53.0


^ permalink raw reply related

* [RFC PATCH 01/20] landlock: Move operations from syscall into ruleset code
From: Justin Suess @ 2026-04-07 20:01 UTC (permalink / raw)
  To: ast, daniel, andrii, kpsingh, paul, mic, viro, brauner, kees
  Cc: gnoack, jack, jmorris, serge, song, yonghong.song, martin.lau, m,
	eddyz87, john.fastabend, sdf, skhan, bpf, linux-security-module,
	linux-kernel, linux-fsdevel, Justin Suess
In-Reply-To: <20260407200157.3874806-1-utilityemal77@gmail.com>

Refactor syscall restriction code, associated constants and helpers,
into ruleset.h/c. This helps increase consistency by making syscall.c a
consumer of ruleset.h/c's logic. Subsequent patches in this series add
consumers of this logic.

Functions for getting and putting references on a landlock ruleset were
also exposed in the patch for the subsequent consumers, transitioning
them from static to linked functions with headers.

Signed-off-by: Justin Suess <utilityemal77@gmail.com>
---
 include/linux/landlock.h     |  92 ++++++++++++++++++
 security/landlock/ruleset.c  | 179 +++++++++++++++++++++++++++++++++++
 security/landlock/ruleset.h  |  19 ++--
 security/landlock/syscalls.c | 151 +++--------------------------
 4 files changed, 296 insertions(+), 145 deletions(-)
 create mode 100644 include/linux/landlock.h

diff --git a/include/linux/landlock.h b/include/linux/landlock.h
new file mode 100644
index 000000000000..fae7d138ef8b
--- /dev/null
+++ b/include/linux/landlock.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock - Internal cross subsystem header
+ *
+ * Copyright © 2026 Justin Suess <utilityemal77@gmail.com>
+ */
+
+#ifndef _LINUX_LANDLOCK_H
+#define _LINUX_LANDLOCK_H
+
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <uapi/linux/landlock.h>
+
+struct landlock_ruleset;
+
+#ifdef CONFIG_SECURITY_LANDLOCK
+
+/*
+ * Returns an owned ruleset from a FD. It is thus needed to call
+ * landlock_put_ruleset() on the returned value.
+ */
+struct landlock_ruleset *landlock_get_ruleset_from_fd(int fd, fmode_t mode);
+
+/*
+ * Acquires an additional reference to a ruleset if it is still alive.
+ */
+bool landlock_try_get_ruleset(struct landlock_ruleset *ruleset);
+
+/*
+ * Releases a previously acquired ruleset.
+ */
+void landlock_put_ruleset(struct landlock_ruleset *ruleset);
+
+/*
+ * Releases a previously acquired ruleset after an RCU-safe deferral.
+ */
+void landlock_put_ruleset_deferred(struct landlock_ruleset *ruleset);
+
+/*
+ * Restricts @cred with @ruleset and the supplied @flags.
+ *
+ * landlock_restrict_cred_precheck() must be called first.
+ *
+ * The caller owns @cred and is responsible for committing or aborting it.
+ * @ruleset may be NULL only with LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.
+ */
+int landlock_restrict_cred_precheck(__u32 flags, bool in_task_context);
+
+int landlock_restrict_cred(struct cred *cred, struct landlock_ruleset *ruleset,
+			   __u32 flags);
+
+#else /* !CONFIG_SECURITY_LANDLOCK */
+
+static inline struct landlock_ruleset *
+landlock_get_ruleset_from_fd(int fd, fmode_t mode)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline bool landlock_try_get_ruleset(struct landlock_ruleset *ruleset)
+{
+	return false;
+}
+
+static inline void landlock_put_ruleset(struct landlock_ruleset *ruleset)
+{
+}
+
+static inline void
+landlock_put_ruleset_deferred(struct landlock_ruleset *ruleset)
+{
+}
+
+static inline int landlock_restrict_cred(struct cred *cred,
+					 struct landlock_ruleset *ruleset,
+					 __u32 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int landlock_restrict_cred_precheck(__u32 flags,
+					 bool in_task_context)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif /* !CONFIG_SECURITY_LANDLOCK */
+
+#endif /* _LINUX_LANDLOCK_H */
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 181df7736bb9..2333a3dc5f33 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -8,25 +8,204 @@
 
 #include <linux/bits.h>
 #include <linux/bug.h>
+#include <linux/capability.h>
 #include <linux/cleanup.h>
 #include <linux/compiler_types.h>
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
 #include <linux/overflow.h>
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
 #include "access.h"
+#include "cred.h"
 #include "domain.h"
 #include "limits.h"
 #include "object.h"
 #include "ruleset.h"
+#include "setup.h"
+#include "tsync.h"
+
+static int fop_ruleset_release(struct inode *const inode,
+			       struct file *const filp)
+{
+	struct landlock_ruleset *ruleset = filp->private_data;
+
+	landlock_put_ruleset(ruleset);
+	return 0;
+}
+
+static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
+			      const size_t size, loff_t *const ppos)
+{
+	/* Dummy handler to enable FMODE_CAN_READ. */
+	return -EINVAL;
+}
+
+static ssize_t fop_dummy_write(struct file *const filp,
+			       const char __user *const buf, const size_t size,
+			       loff_t *const ppos)
+{
+	/* Dummy handler to enable FMODE_CAN_WRITE. */
+	return -EINVAL;
+}
+
+/*
+ * A ruleset file descriptor enables to build a ruleset by adding (i.e.
+ * writing) rule after rule, without relying on the task's context.  This
+ * reentrant design is also used in a read way to enforce the ruleset on the
+ * current task.
+ */
+const struct file_operations ruleset_fops = {
+	.release = fop_ruleset_release,
+	.read = fop_dummy_read,
+	.write = fop_dummy_write,
+};
+
+/*
+ * Returns an owned ruleset from a FD. It is thus needed to call
+ * landlock_put_ruleset() on the return value.
+ */
+struct landlock_ruleset *landlock_get_ruleset_from_fd(const int fd,
+						      const fmode_t mode)
+{
+	CLASS(fd, ruleset_f)(fd);
+	struct landlock_ruleset *ruleset;
+
+	if (fd_empty(ruleset_f))
+		return ERR_PTR(-EBADF);
+
+	/* Checks FD type and access right. */
+	if (fd_file(ruleset_f)->f_op != &ruleset_fops)
+		return ERR_PTR(-EBADFD);
+	if (!(fd_file(ruleset_f)->f_mode & mode))
+		return ERR_PTR(-EPERM);
+	ruleset = fd_file(ruleset_f)->private_data;
+	if (WARN_ON_ONCE(ruleset->num_layers != 1))
+		return ERR_PTR(-EINVAL);
+	landlock_get_ruleset(ruleset);
+	return ruleset;
+}
+
+void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
+{
+	if (ruleset)
+		refcount_inc(&ruleset->usage);
+}
+
+bool landlock_try_get_ruleset(struct landlock_ruleset *const ruleset)
+{
+	return ruleset && refcount_inc_not_zero(&ruleset->usage);
+}
+
+int landlock_restrict_cred_precheck(const __u32 flags,
+				    const bool in_task_context)
+{
+	if (!landlock_initialized)
+		return -EOPNOTSUPP;
+
+	/*
+	 * LANDLOCK_RESTRICT_SELF_TSYNC requires that the current task is
+	 * the target of restriction.
+	 */
+	if ((flags & LANDLOCK_RESTRICT_SELF_TSYNC) && !in_task_context)
+		return -EINVAL;
+
+	/*
+	 * Similar checks as for seccomp(2), except that an -EPERM may be
+	 * returned.
+	 */
+	if (!task_no_new_privs(current) &&
+	    !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) {
+			return -EPERM;
+	}
+
+	if (flags & ~LANDLOCK_MASK_RESTRICT_SELF)
+		return -EINVAL;
+
+	return 0;
+}
+
+int landlock_restrict_cred(struct cred *const cred,
+			   struct landlock_ruleset *const ruleset,
+			   const __u32 flags)
+{
+	struct landlock_cred_security *new_llcred;
+	bool __maybe_unused log_same_exec, log_new_exec, log_subdomains,
+		prev_log_subdomains;
+
+	/*
+	 * It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF without
+	 * a ruleset, optionally combined with LANDLOCK_RESTRICT_SELF_TSYNC, but
+	 * no other flag must be set.
+	 */
+	if (!ruleset &&
+	    (flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) !=
+		    LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)
+		return -EINVAL;
+
+	/* Translates "off" flag to boolean. */
+	log_same_exec = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF);
+	/* Translates "on" flag to boolean. */
+	log_new_exec = !!(flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON);
+	/* Translates "off" flag to boolean. */
+	log_subdomains = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
+
+	new_llcred = landlock_cred(cred);
+
+#ifdef CONFIG_AUDIT
+	prev_log_subdomains = !new_llcred->log_subdomains_off;
+	new_llcred->log_subdomains_off = !prev_log_subdomains ||
+					 !log_subdomains;
+#endif /* CONFIG_AUDIT */
+
+	/*
+	 * The only case when a ruleset may not be set is if
+	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set, optionally combined
+	 * with LANDLOCK_RESTRICT_SELF_TSYNC.
+	 * We could optimize this case by not committing @cred if this flag was
+	 * already set, but it is not worth the complexity.
+	 */
+	if (ruleset) {
+		struct landlock_ruleset *const new_dom =
+			landlock_merge_ruleset(new_llcred->domain, ruleset);
+
+		if (IS_ERR(new_dom))
+			return PTR_ERR(new_dom);
+
+#ifdef CONFIG_AUDIT
+		new_dom->hierarchy->log_same_exec = log_same_exec;
+		new_dom->hierarchy->log_new_exec = log_new_exec;
+		if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains)
+			new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED;
+#endif /* CONFIG_AUDIT */
+
+		landlock_put_ruleset(new_llcred->domain);
+		new_llcred->domain = new_dom;
+
+#ifdef CONFIG_AUDIT
+		new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
+#endif /* CONFIG_AUDIT */
+	}
+
+	if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) {
+		const int tsync_err =
+			landlock_restrict_sibling_threads(current_cred(), cred);
+
+		if (tsync_err)
+			return tsync_err;
+	}
+
+	return 0;
+}
 
 static struct landlock_ruleset *create_ruleset(const u32 num_layers)
 {
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 889f4b30301a..0facc5cb6555 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -11,6 +11,8 @@
 
 #include <linux/cleanup.h>
 #include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/landlock.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
@@ -20,6 +22,8 @@
 #include "limits.h"
 #include "object.h"
 
+extern const struct file_operations ruleset_fops;
+
 struct landlock_hierarchy;
 
 /**
@@ -194,6 +198,8 @@ landlock_create_ruleset(const access_mask_t access_mask_fs,
 			const access_mask_t access_mask_net,
 			const access_mask_t scope_mask);
 
+void landlock_get_ruleset(struct landlock_ruleset *ruleset);
+
 void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
 void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
 
@@ -204,6 +210,13 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 			 const struct landlock_id id,
 			 const access_mask_t access);
 
+int landlock_restrict_cred_precheck(const __u32 flags,
+				    const bool in_task_context);
+
+int landlock_restrict_cred(struct cred *const cred,
+			   struct landlock_ruleset *const ruleset,
+			   const __u32 flags);
+
 struct landlock_ruleset *
 landlock_merge_ruleset(struct landlock_ruleset *const parent,
 		       struct landlock_ruleset *const ruleset);
@@ -212,12 +225,6 @@ const struct landlock_rule *
 landlock_find_rule(const struct landlock_ruleset *const ruleset,
 		   const struct landlock_id id);
 
-static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
-{
-	if (ruleset)
-		refcount_inc(&ruleset->usage);
-}
-
 /**
  * landlock_union_access_masks - Return all access rights handled in the
  *				 domain
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index accfd2e5a0cd..c710e8b16150 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -121,42 +121,6 @@ static void build_check_abi(void)
 
 /* Ruleset handling */
 
-static int fop_ruleset_release(struct inode *const inode,
-			       struct file *const filp)
-{
-	struct landlock_ruleset *ruleset = filp->private_data;
-
-	landlock_put_ruleset(ruleset);
-	return 0;
-}
-
-static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
-			      const size_t size, loff_t *const ppos)
-{
-	/* Dummy handler to enable FMODE_CAN_READ. */
-	return -EINVAL;
-}
-
-static ssize_t fop_dummy_write(struct file *const filp,
-			       const char __user *const buf, const size_t size,
-			       loff_t *const ppos)
-{
-	/* Dummy handler to enable FMODE_CAN_WRITE. */
-	return -EINVAL;
-}
-
-/*
- * A ruleset file descriptor enables to build a ruleset by adding (i.e.
- * writing) rule after rule, without relying on the task's context.  This
- * reentrant design is also used in a read way to enforce the ruleset on the
- * current task.
- */
-static const struct file_operations ruleset_fops = {
-	.release = fop_ruleset_release,
-	.read = fop_dummy_read,
-	.write = fop_dummy_write,
-};
-
 /*
  * The Landlock ABI version should be incremented for each new Landlock-related
  * user space visible change (e.g. Landlock syscalls).  This version should
@@ -264,31 +228,6 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
 	return ruleset_fd;
 }
 
-/*
- * Returns an owned ruleset from a FD. It is thus needed to call
- * landlock_put_ruleset() on the return value.
- */
-static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
-						    const fmode_t mode)
-{
-	CLASS(fd, ruleset_f)(fd);
-	struct landlock_ruleset *ruleset;
-
-	if (fd_empty(ruleset_f))
-		return ERR_PTR(-EBADF);
-
-	/* Checks FD type and access right. */
-	if (fd_file(ruleset_f)->f_op != &ruleset_fops)
-		return ERR_PTR(-EBADFD);
-	if (!(fd_file(ruleset_f)->f_mode & mode))
-		return ERR_PTR(-EPERM);
-	ruleset = fd_file(ruleset_f)->private_data;
-	if (WARN_ON_ONCE(ruleset->num_layers != 1))
-		return ERR_PTR(-EINVAL);
-	landlock_get_ruleset(ruleset);
-	return ruleset;
-}
-
 /* Path handling */
 
 /*
@@ -437,7 +376,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
 		return -EINVAL;
 
 	/* Gets and checks the ruleset. */
-	ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
+	ruleset = landlock_get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
 	if (IS_ERR(ruleset))
 		return PTR_ERR(ruleset);
 
@@ -487,33 +426,13 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
 SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 		flags)
 {
-	struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL;
 	struct cred *new_cred;
-	struct landlock_cred_security *new_llcred;
-	bool __maybe_unused log_same_exec, log_new_exec, log_subdomains,
-		prev_log_subdomains;
-
-	if (!is_initialized())
-		return -EOPNOTSUPP;
-
-	/*
-	 * Similar checks as for seccomp(2), except that an -EPERM may be
-	 * returned.
-	 */
-	if (!task_no_new_privs(current) &&
-	    !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if ((flags | LANDLOCK_MASK_RESTRICT_SELF) !=
-	    LANDLOCK_MASK_RESTRICT_SELF)
-		return -EINVAL;
+	struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL;
+	int err;
 
-	/* Translates "off" flag to boolean. */
-	log_same_exec = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF);
-	/* Translates "on" flag to boolean. */
-	log_new_exec = !!(flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON);
-	/* Translates "off" flag to boolean. */
-	log_subdomains = !(flags & LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
+	err = landlock_restrict_cred_precheck(flags, true);
+	if (err)
+		return err;
 
 	/*
 	 * It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
@@ -525,7 +444,8 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 	      (flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) ==
 		      LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
 		/* Gets and checks the ruleset. */
-		ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
+		ruleset = landlock_get_ruleset_from_fd(ruleset_fd,
+						       FMODE_CAN_READ);
 		if (IS_ERR(ruleset))
 			return PTR_ERR(ruleset);
 	}
@@ -535,57 +455,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 	if (!new_cred)
 		return -ENOMEM;
 
-	new_llcred = landlock_cred(new_cred);
-
-#ifdef CONFIG_AUDIT
-	prev_log_subdomains = !new_llcred->log_subdomains_off;
-	new_llcred->log_subdomains_off = !prev_log_subdomains ||
-					 !log_subdomains;
-#endif /* CONFIG_AUDIT */
-
-	/*
-	 * The only case when a ruleset may not be set is if
-	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set (optionally with
-	 * LANDLOCK_RESTRICT_SELF_TSYNC) and ruleset_fd is -1.  We could
-	 * optimize this case by not calling commit_creds() if this flag was
-	 * already set, but it is not worth the complexity.
-	 */
-	if (ruleset) {
-		/*
-		 * There is no possible race condition while copying and
-		 * manipulating the current credentials because they are
-		 * dedicated per thread.
-		 */
-		struct landlock_ruleset *const new_dom =
-			landlock_merge_ruleset(new_llcred->domain, ruleset);
-		if (IS_ERR(new_dom)) {
-			abort_creds(new_cred);
-			return PTR_ERR(new_dom);
-		}
-
-#ifdef CONFIG_AUDIT
-		new_dom->hierarchy->log_same_exec = log_same_exec;
-		new_dom->hierarchy->log_new_exec = log_new_exec;
-		if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains)
-			new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED;
-#endif /* CONFIG_AUDIT */
-
-		/* Replaces the old (prepared) domain. */
-		landlock_put_ruleset(new_llcred->domain);
-		new_llcred->domain = new_dom;
-
-#ifdef CONFIG_AUDIT
-		new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
-#endif /* CONFIG_AUDIT */
-	}
-
-	if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) {
-		const int err = landlock_restrict_sibling_threads(
-			current_cred(), new_cred);
-		if (err) {
-			abort_creds(new_cred);
-			return err;
-		}
+	err = landlock_restrict_cred(new_cred, ruleset, flags);
+	if (err) {
+		abort_creds(new_cred);
+		return err;
 	}
 
 	return commit_creds(new_cred);
-- 
2.53.0


^ permalink raw reply related

* [RFC PATCH 00/20] BPF interface for applying Landlock rulesets
From: Justin Suess @ 2026-04-07 20:01 UTC (permalink / raw)
  To: ast, daniel, andrii, kpsingh, paul, mic, viro, brauner, kees
  Cc: gnoack, jack, jmorris, serge, song, yonghong.song, martin.lau, m,
	eddyz87, john.fastabend, sdf, skhan, bpf, linux-security-module,
	linux-kernel, linux-fsdevel, Justin Suess

Hello,

This series lets sleepable BPF LSM programs apply an existing,
userspace-created Landlock ruleset to a program during exec.

The goal is not to move Landlock policy definition into BPF, nor to create a
second policy engine.  Instead, BPF is used only to select when an already
valid Landlock ruleset should be applied, based on runtime exec context.

Background
===

Landlock is primarily a syscall-driven, unprivileged-first LSM.  That model
works well when the application being sandboxed can create and enforce its own
rulesets, or when a trusted launcher can impose restrictions directly before
running a trusted target.

That becomes harder when the target program is not under first-party control,
for example:

1. third-party binaries,
2. unmodified container images,
3. programs reached through shells, wrappers, or service managers, and
4. user-supplied or otherwise untrusted code.

In these cases, an external supervisor may want to apply a Landlock ruleset to
the final executed program, while leaving unrelated parents or helper
processes alone.

Why external sandboxing is awkward today
===

There are two recurring problems.

First, userspace cannot reliably predict every file a target may need across
different systems, packaging layouts, and runtime conditions.  Shared
libraries, configuration files, interpreters, and helper binaries often depend
on details that are only known at runtime.

Second, Landlock inheritance is intentionally one-way.  Once a task is
restricted, descendants inherit that domain and may only become more
restricted.  This is exactly what Landlock should do, but it makes external
sandboxing awkward when the program of interest is buried inside a larger exec
chain.  Applying restrictions too early can affect unrelated intermediates;
applying them too late misses the target entirely.

This series addresses that target-selection problem.

Overview
===

This series adds a small BPF-to-Landlock bridge:

1. userspace creates a normal Landlock ruleset through the existing ABI;
2. userspace inserts that ruleset FD into a new
	BPF_MAP_TYPE_LANDLOCK_RULESET map;
3. a sleepable BPF LSM program attached to an exec-time hook looks up the
	ruleset; and
4. the program calls a kfunc to apply that ruleset to the new program's
	credentials before exec completes.

The important point is that BPF does not create, inspect, or mutate Landlock
policy here.  It only decides whether to apply a ruleset that was already
created and validated through Landlock's existing userspace API.

Interface
===

The series adds:

1. bpf_landlock_restrict_binprm(), which applies a referenced ruleset to
	struct linux_binprm credentials;
2. bpf_landlock_put_ruleset(), which releases a referenced ruleset; and
3. BPF_MAP_TYPE_LANDLOCK_RULESET, a specialized map type for holding
	references to Landlock rulesets originating from userspace file
	descriptors.
4. A new field in the linux_binprm struct to enable application of
   task_set_no_new_privs once execution is beyond the point of no return.

The kfuncs are restricted to sleepable BPF LSM programs attached to
bprm_creds_for_exec and bprm_creds_from_file, which are the points where the
new program's credentials may still be updated safely.

This series also adds LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS.  On the BPF path,
this is staged through the exec context and committed only after exec reaches
point-of-no-return.  This avoids side effects on failed executions while
ensuring that the resulting task cannot gain more privileges through later exec
transitions. This is done through the set_nnp_on_point_of_no_return field.

This has a little subtlety: LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS in the BPF
path will not stop the current execution from escalating at all; only subsequent
ones. This is intentional to allow landlock policies to be applied through a
setuid transition for instance, without affecting the current escalation.

Semantics
===

This proposal is intended to preserve Landlock semantics as much as practical
for an exec-time BPF attachment model:

1. only pre-existing Landlock rulesets may be applied;
2. BPF cannot construct, inspect, or modify rulesets;
3. enforcement still happens before the new program begins execution;
4. normal Landlock inheritance, layering, and future composition remain
	unchanged; and
5. this does not bypass Landlock's privilege checks for applying Landlock
    rulesets.

In other words, BPF acts as an external selector for when to apply Landlock,
not as a replacement for Landlock's enforcement engine.

All behavior, future access rights, and previous access rights are designed
to automatically be supported from either BPF or existing syscall contexts.

The main semantic difference is LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS on the BPF
path: it guarantees that the resulting task is pinned with no_new_privs before
it can perform later exec transitions, but it does not retroactively suppress
privilege gain for the current exec transition itself.

The other exception to semantics is the LANDLOCK_RESTRICT_SELF_TSYNC flag.
(see Points of Feedback section)

Patch layout
===

Patches 1-5 prepare the Landlock side by moving shared ruleset logic out of
syscalls.c, adding a no_new_privs flag for non-syscall callers, exposing
linux_binprm->set_nnp_on_point_of_no_return as an interface to set no_new_privs
on the point of no return, and making deferred ruleset destruction RCU-safe.

Patches 6-10 add the BPF-facing pieces: the Landlock kfuncs, the new map type,
syscall handling for that map, and verifier support.

Patches 11-15 add selftests and the small bpftool update needed for the new
map type.

Patches 16-20 add docs and bump the ABI version and update MAINTAINERS.

Feedback is especially welcome on the overall interface shape, the choice of
hooks, and the map semantics.

Testing
===

This patch series has two portions of tests.

One lives in the traditional Landlock selftests, for the new
LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS flag.

The other suite lives under the BPF selftests, and this tests the Landlock
kfuncs and the new BPF_MAP_TYPE_LANDLOCK_RULESET.

This patch series was run through BPF CI, the results of which are here. [1]

All mentioned tests are passing, as well as the BPF CI.

[1] : https://github.com/kernel-patches/bpf/pull/11562

Points of Feedback
===

First, the new set_nnp_on_point_of_no_return field in struct linux_binprm.
This field was needed to request that task_set_no_new_privs be set during an
execution, but only after the execution has proceeded beyond the point of no
return. I couldn't find a way to express this semantic without adding a new
bitfield to struct linux_binprm and a conditional in fs/exec.c. Please see
patch 2.

Feedback on the BPF testing harness, which was generated with AI assistance as
disclosed in the commit footer, is welcomed. I have only limited familiarity
with BPF testing practices. These tests were made with strong human supervision.
See patches 14 and 15.

Feedback on the NO_NEW_PRIVS situation is also welcomed. Because task_set_no_new_privs()
would otherwise leak state on failed executions or AT_EXECVE_CHECK, this series
stages no_new_privs through the exec context and only commits it after
point-of-no-return. This preserves failure behavior while still ensuring that
the resulting task cannot elevate further through later exec transitions.
When called from bprm_creds_from_file, this does not retroactively change the
privilege outcome of the current exec transition itself.

See patch 2 and 3.

Next, the RCU in the landlock_ruleset. Existing BPF maps use RCU to make sure maps
holding references stay valid. I altered the landlock ruleset to use rcu_work
to make sure that the rcu is synchronized before putting on a ruleset, and
acquire the rcu in the arraymap implementation. See patches 5-10.

Next, the semantics of the map. What operations should be supported from BPF
and userspace and what data types should they return? I consider the struct
bpf_landlock_ruleset to be opaque. Userspace can add items to the map via the
fd, delete items by their index, and BPF can delete and lookup items by their
index. Items cannot be updated, only swapped.

Finally, the handling of the LANDLOCK_RESTRICT_SELF_TSYNC flag. This flag has
no meaning in a pre-execution context, as the credentials during the designated
LSM hooks (bprm_creds_for_exec/creds_from_file) still represent the pre-execution
task. Therefore, this flag is invalidated and attempting to use it with
bpf_landlock_restrict_binprm will return -EINVAL. Otherwise, the flag would
result in applying the landlock ruleset to the wrong target in addition to the
intended one. (see patch 2). This behavior is validated with selftests.

Existing works / Credits
===

Mickaël Salaün created patchsets adding BPF tracepoints for landlock in [2] [3].

Mickaël also gave feedback on this feature and the idea in this GitHub thread. [4]

Günther Noack initially received and provided initial feedback on this idea as
an early prototype.

Liz Rice, author of "Learning eBPF: Programming the Linux Kernel for Enhanced
Observability, Networking, and Security" provided background and inspired me to
experiment with BPF and the BPF LSM. [5]

[2] : https://lore.kernel.org/all/20250523165741.693976-1-mic@digikod.net/
[3] : https://lore.kernel.org/linux-security-module/20260406143717.1815792-1-mic@digikod.net/
[4] : https://github.com/landlock-lsm/linux/issues/56
[5] : https://wellesleybooks.com/book/9781098135126

Kind Regards,
Justin Suess

Justin Suess (20):
  landlock: Move operations from syscall into ruleset code
  execve: Add set_nnp_on_point_of_no_return
  landlock: Implement LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
  selftests/landlock: Cover LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
  landlock: Make ruleset deferred free RCU safe
  bpf: lsm: Add Landlock kfuncs
  bpf: arraymap: Implement Landlock ruleset map
  bpf: Add Landlock ruleset map type
  bpf: syscall: Handle Landlock ruleset maps
  bpf: verifier: Add Landlock ruleset map support
  selftests/bpf: Add Landlock kfunc declarations
  selftests/landlock: Rename gettid wrapper for BPF reuse
  selftests/bpf: Enable Landlock in selftests kernel.
  selftests/bpf: Add Landlock kfunc test program
  selftests/bpf: Add Landlock kfunc test runner
  landlock: Bump ABI version
  tools: bpftool: Add documentation for landlock_ruleset
  landlock: Document LANDLOCK_RESTRICT_SELF_NO_NEW_PRIVS
  bpf: Document BPF_MAP_TYPE_LANDLOCK_RULESET
  MAINTAINERS: update entry for the Landlock subsystem

 Documentation/bpf/map_landlock_ruleset.rst    | 181 +++++
 Documentation/userspace-api/landlock.rst      |  22 +-
 MAINTAINERS                                   |   4 +
 fs/exec.c                                     |   8 +
 include/linux/binfmts.h                       |   7 +-
 include/linux/bpf_lsm.h                       |  15 +
 include/linux/bpf_types.h                     |   1 +
 include/linux/landlock.h                      |  92 +++
 include/uapi/linux/bpf.h                      |   1 +
 include/uapi/linux/landlock.h                 |  14 +
 kernel/bpf/arraymap.c                         |  67 ++
 kernel/bpf/bpf_lsm.c                          | 145 ++++
 kernel/bpf/syscall.c                          |   4 +-
 kernel/bpf/verifier.c                         |  15 +-
 samples/landlock/sandboxer.c                  |   7 +-
 security/landlock/limits.h                    |   2 +-
 security/landlock/ruleset.c                   | 198 ++++-
 security/landlock/ruleset.h                   |  25 +-
 security/landlock/syscalls.c                  | 158 +---
 .../bpf/bpftool/Documentation/bpftool-map.rst |   2 +-
 tools/bpf/bpftool/map.c                       |   2 +-
 tools/include/uapi/linux/bpf.h                |   1 +
 tools/lib/bpf/libbpf.c                        |   1 +
 tools/lib/bpf/libbpf_probes.c                 |   6 +
 tools/testing/selftests/bpf/bpf_kfuncs.h      |  20 +
 tools/testing/selftests/bpf/config            |   5 +
 tools/testing/selftests/bpf/config.x86_64     |   1 -
 .../bpf/prog_tests/landlock_kfuncs.c          | 733 ++++++++++++++++++
 .../selftests/bpf/progs/landlock_kfuncs.c     |  92 +++
 tools/testing/selftests/landlock/base_test.c  |  10 +-
 tools/testing/selftests/landlock/common.h     |  28 +-
 tools/testing/selftests/landlock/fs_test.c    | 103 +--
 tools/testing/selftests/landlock/net_test.c   |  55 +-
 .../testing/selftests/landlock/ptrace_test.c  |  14 +-
 .../landlock/scoped_abstract_unix_test.c      |  51 +-
 .../selftests/landlock/scoped_base_variants.h |  23 +
 .../selftests/landlock/scoped_common.h        |   5 +-
 .../selftests/landlock/scoped_signal_test.c   |  30 +-
 tools/testing/selftests/landlock/wrappers.h   |   2 +-
 39 files changed, 1877 insertions(+), 273 deletions(-)
 create mode 100644 Documentation/bpf/map_landlock_ruleset.rst
 create mode 100644 include/linux/landlock.h
 create mode 100644 tools/testing/selftests/bpf/prog_tests/landlock_kfuncs.c
 create mode 100644 tools/testing/selftests/bpf/progs/landlock_kfuncs.c


base-commit: 8c6a27e02bc55ab110d1828610048b19f903aaec
-- 
2.53.0


^ permalink raw reply

* Re: [PATCH v4 3/3] selinux: fix overlayfs mmap() and mprotect() access checks
From: Stephen Smalley @ 2026-04-07 19:20 UTC (permalink / raw)
  To: Paul Moore
  Cc: Ondrej Mosnacek, linux-security-module, selinux, linux-fsdevel,
	linux-unionfs, linux-erofs, Amir Goldstein, Gao Xiang,
	Christian Brauner
In-Reply-To: <CAHC9VhQnA38-9wDeVmOMxAFPHnd9y6x5LXtD3cSquGiL_MDDpA@mail.gmail.com>

On Tue, Apr 7, 2026 at 10:35 AM Paul Moore <paul@paul-moore.com> wrote:
>
> On Tue, Apr 7, 2026 at 8:14 AM Stephen Smalley
> <stephen.smalley.work@gmail.com> wrote:
> > On Thu, Apr 2, 2026 at 11:09 PM Paul Moore <paul@paul-moore.com> wrote:
> > >
> > > The existing SELinux security model for overlayfs is to allow access if
> > > the current task is able to access the top level file (the "user" file)
> > > and the mounter's credentials are sufficient to access the lower
> > > level file (the "backing" file).  Unfortunately, the current code does
> > > not properly enforce these access controls for both mmap() and mprotect()
> > > operations on overlayfs filesystems.
> > >
> > > This patch makes use of the newly created security_mmap_backing_file()
> > > LSM hook to provide the missing backing file enforcement for mmap()
> > > operations, and leverages the backing file API and new LSM blob to
> > > provide the necessary information to properly enforce the mprotect()
> > > access controls.
> > >
> > > Cc: stable@vger.kernel.org
> > > Signed-off-by: Paul Moore <paul@paul-moore.com>
> >
> > Do you have tests for these changes showing the before and after (i.e.
> > failing without your patches, passing with them)? I tried running an
> > earlier set from Ondrej but they failed.
>
> A few months ago I sent you and Ondrej some feedback on those early
> tests from Ondrej, but yes, I also had problems with Ondrej's tests.
> I've been using a hacked up combination of the existing tests, some of
> Ondrej's additions, and an additional debug/test patch to ensure the
> labeling is correct.  It's far from ideal, but I didn't invest time in
> test development as I assumed Ondrej would continue his efforts there
> (unfortunately it doesn't appear that he has?), and I wanted to focus
> on getting a solution as soon as possible for obvious reasons.

Ok, I'm happy to look at even unpolished tests - just want something I
can use to exercise the before and after states.

^ permalink raw reply

* Re: [PATCH v2 1/2] landlock: Fix LOG_SUBDOMAINS_OFF inheritance across fork()
From: Günther Noack @ 2026-04-07 19:02 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Günther Noack, linux-security-module, Jann Horn, stable
In-Reply-To: <20260407164107.2012589-1-mic@digikod.net>

On Tue, Apr 07, 2026 at 06:41:04PM +0200, Mickaël Salaün wrote:
> hook_cred_transfer() only copies the Landlock security blob when the
> source credential has a domain.  This is inconsistent with
> landlock_restrict_self() which can set LOG_SUBDOMAINS_OFF on a
> credential without creating a domain (via the ruleset_fd=-1 path): the
> field is committed but not preserved across fork() because the child's
> prepare_creds() calls hook_cred_transfer() which skips the copy when
> domain is NULL.
> 
> This breaks the documented use case where a process mutes subdomain logs
> before forking sandboxed children: the children lose the muting and
> their domains produce unexpected audit records.
> 
> Fix this by unconditionally copying the Landlock credential blob.

As before, LGTM for both patches. Thanks for the fixes!

Reviewed-by: Günther Noack <gnoack3000@gmail.com>

–Günther

^ permalink raw reply

* Re: [PATCH v1 1/2] landlock: Fix log_subdomains_off inheritance across fork()
From: Günther Noack @ 2026-04-07 19:00 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Jann Horn, Günther Noack, linux-security-module, stable
In-Reply-To: <20260407.wuaqueid3Pai@digikod.net>

On Tue, Apr 07, 2026 at 06:03:58PM +0200, Mickaël Salaün wrote:
> On Tue, Apr 07, 2026 at 09:30:40AM +0200, Günther Noack wrote:
> > On Sat, Apr 04, 2026 at 10:49:57AM +0200, Mickaël Salaün wrote:
> > > --- a/security/landlock/cred.c
> > > +++ b/security/landlock/cred.c
> > > @@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cred *const new,
> > >  	const struct landlock_cred_security *const old_llcred =
> > >  		landlock_cred(old);
> > >  
> > > -	if (old_llcred->domain) {
> > > -		landlock_get_ruleset(old_llcred->domain);
> > > -		*landlock_cred(new) = *old_llcred;
> > > -	}
> > > +	landlock_get_ruleset(old_llcred->domain);
> > > +	*landlock_cred(new) = *old_llcred;
> > 
> > This fix looks correct for the hook_cred_prepare() case (and of
> > course, hook_cred_prepare() calls hook_cred_transfer() in Landlock).
> > 
> > 
> > But I'm afraid I might have spotted another issue here:
> > 
> > If I look at the code in security/keys/process_keys.c, where
> > security_tranfer_creds() is called, the "old" object is actually
> > already initialized, and if we are not checking for that, I think we
> > are leaking memory.
> 
> old is only a partially initialized credential, and the Landlock
> part is not set yet, which is the goal of hook_transfer_creds(), so
> there is no leak.

Ah, you are right.  I think we might have mixed up the names "old" and
"new" in the discussion briefly, but it's still correct - the target
credential is only partially populated and its Landlock domain is not
set, so we don't need to call landlock_put_ruleset() on it.


> > I would suggest to use the helper landlock_cred_copy() from cred.h for
> 
> This is not required but if we would like to do it anyway, that would
> not be backportable and would introduce a (minimal) performance penalty.

Fair enough, the backportability is a reasonable argument.


> > Test looks fine.
> > 
> > While I do still think we should investigate the memory leak, this
> > commit is, as it is, already a strict improvement over what we had
> > before, so:
> > 
> > Reviewed-by: Günther Noack <gnoack3000@gmail.com>
> 
> I'll keep your tag if this patch is ok with you as-is.

Yes, absolutely.

–Günther

^ permalink raw reply

* Re: LSM: Whiteout chardev creation sidesteps mknod hook
From: Serge Hallyn @ 2026-04-07 17:15 UTC (permalink / raw)
  To: Günther Noack
  Cc: Christian Brauner, Mickaël Salaün, Paul Moore,
	linux-security-module
In-Reply-To: <adUBCQXrt7kmgqJT@google.com>

Apr 7, 2026 08:05:43 Günther Noack <gnoack@google.com>:

> Hello Christian, Paul, Mickaël and LSM maintainers!
>
> I discovered the following bug in Landlock, which potentially also
> affects other LSMs:
>
> With renameat2(2)'s RENAME_WHITEOUT flag, it is possible to create a
> "whiteout object" at the source of the rename.  Whiteout objects are
> character devices with major/minor (0, 0) -- these devices are not
> bound to any driver, so they are harmless, but still, the creation of
> these files can sidestep the LANDLOCK_ACCESS_FS_MAKE_CHAR access right
> in Landlock.
>
>
> I am unconvinced which is the right fix here -- do you have an opinion
> on this from the VFS/LSM side?
>
>
> Option 1: Make filesystems call security_path_mknod() during RENAME_WHITEOUT?
>
> Do it in the VFS rename hook.
>
> * Pro: Fixes it for all LSMs
> * Con: Call would have to be done in multiple filesystems
>
>
> Option 2: Handle it in security_{path,inode}_rename()
>
> Make Landlock handle it in security_inode_rename() by looking for the
> RENAME_WHITEOUT flag.
>
> * Con: Operation should only be denied if the file system even
>   implements RENAME_WHITEOUT, and we would have to maintain a list of
>   affected filesystems for that.  (That feels like solving it at the
>   wrong layer of abstraction.)
> * Con: Unclear whether other LSMs need a similar fix
>
>
> Option 3: Declare that this is working as intended?

Option 3 has my vote.


> * Pro: (0, 0) is not a "real" character device
>
>
> In cases 1 and 2, we'd likely need to double check that we are not
> breaking existing scenarios involving OverlayFS, by suddenly requiring
> a more lax policy for creating character devices on these directories.
>
> Please let me know what you think.  I'm specifically interested in:
>
> 1. Christian: What is the appropriate way to do this VFS wise?
> 2. LSM maintainers: Is this a bug that affects other LSMs as well?
>
> Thanks,
> —Günther
>
> P.S.: For full transparency, I found this bug by pointing Google
> Gemini at the Landlock codebase.


^ permalink raw reply

* [PATCH v2 1/2] landlock: Fix LOG_SUBDOMAINS_OFF inheritance across fork()
From: Mickaël Salaün @ 2026-04-07 16:41 UTC (permalink / raw)
  To: Günther Noack
  Cc: Mickaël Salaün, linux-security-module, Jann Horn,
	stable, Günther Noack

hook_cred_transfer() only copies the Landlock security blob when the
source credential has a domain.  This is inconsistent with
landlock_restrict_self() which can set LOG_SUBDOMAINS_OFF on a
credential without creating a domain (via the ruleset_fd=-1 path): the
field is committed but not preserved across fork() because the child's
prepare_creds() calls hook_cred_transfer() which skips the copy when
domain is NULL.

This breaks the documented use case where a process mutes subdomain logs
before forking sandboxed children: the children lose the muting and
their domains produce unexpected audit records.

Fix this by unconditionally copying the Landlock credential blob.

Cc: Günther Noack <gnoack@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: stable@vger.kernel.org
Fixes: ead9079f7569 ("landlock: Add LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF")
Reviewed-by: Günther Noack <gnoack3000@gmail.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
https://lore.kernel.org/r/20260404085001.1604405-1-mic@digikod.net
- Improve subject.
- Add Reviewed-by Günther.
---
 security/landlock/cred.c                      |  6 +-
 tools/testing/selftests/landlock/audit_test.c | 88 +++++++++++++++++++
 2 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/security/landlock/cred.c b/security/landlock/cred.c
index 0cb3edde4d18..cc419de75cd6 100644
--- a/security/landlock/cred.c
+++ b/security/landlock/cred.c
@@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cred *const new,
 	const struct landlock_cred_security *const old_llcred =
 		landlock_cred(old);
 
-	if (old_llcred->domain) {
-		landlock_get_ruleset(old_llcred->domain);
-		*landlock_cred(new) = *old_llcred;
-	}
+	landlock_get_ruleset(old_llcred->domain);
+	*landlock_cred(new) = *old_llcred;
 }
 
 static int hook_cred_prepare(struct cred *const new,
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index 46d02d49835a..20099b8667e7 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -279,6 +279,94 @@ TEST_F(audit, thread)
 				&audit_tv_default, sizeof(audit_tv_default)));
 }
 
+/*
+ * Verifies that log_subdomains_off set via the ruleset_fd=-1 path (without
+ * creating a domain) is inherited by children across fork().  This exercises
+ * the hook_cred_transfer() fix: the Landlock credential blob must be copied
+ * even when the source credential has no domain.
+ *
+ * Phase 1 (baseline): a child without muting creates a domain and triggers a
+ * denial that IS logged.
+ *
+ * Phase 2 (after muting): the parent mutes subdomain logs, forks another child
+ * who creates a domain and triggers a denial that is NOT logged.
+ */
+TEST_F(audit, log_subdomains_off_fork)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.scoped = LANDLOCK_SCOPE_SIGNAL,
+	};
+	struct audit_records records;
+	int ruleset_fd, status;
+	pid_t child;
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+	/*
+	 * Phase 1: forks a child that creates a domain and triggers a denial
+	 * before any muting.  This proves the audit path works.
+	 */
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+		ASSERT_EQ(-1, kill(getppid(), 0));
+		ASSERT_EQ(EPERM, errno);
+		_exit(0);
+		return;
+	}
+
+	ASSERT_EQ(child, waitpid(child, &status, 0));
+	ASSERT_EQ(true, WIFEXITED(status));
+	ASSERT_EQ(0, WEXITSTATUS(status));
+
+	/* The denial must be logged (baseline). */
+	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, getpid(),
+					NULL));
+
+	/* Drains any remaining records (e.g. domain allocation). */
+	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+
+	/*
+	 * Mutes subdomain logs without creating a domain.  The parent's
+	 * credential has domain=NULL and log_subdomains_off=1.
+	 */
+	ASSERT_EQ(0, landlock_restrict_self(
+			     -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+
+	/*
+	 * Phase 2: forks a child that creates a domain and triggers a denial.
+	 * Because log_subdomains_off was inherited via fork(), the child's
+	 * domain has log_status=LANDLOCK_LOG_DISABLED.
+	 */
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+		ASSERT_EQ(-1, kill(getppid(), 0));
+		ASSERT_EQ(EPERM, errno);
+		_exit(0);
+		return;
+	}
+
+	ASSERT_EQ(child, waitpid(child, &status, 0));
+	ASSERT_EQ(true, WIFEXITED(status));
+	ASSERT_EQ(0, WEXITSTATUS(status));
+
+	/* No denial record should appear. */
+	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
+					      getpid(), NULL));
+
+	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+	EXPECT_EQ(0, records.access);
+
+	EXPECT_EQ(0, close(ruleset_fd));
+}
+
 FIXTURE(audit_flags)
 {
 	struct audit_filter audit_filter;
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 2/2] landlock: Allow TSYNC with LOG_SUBDOMAINS_OFF and fd=-1
From: Mickaël Salaün @ 2026-04-07 16:41 UTC (permalink / raw)
  To: Günther Noack
  Cc: Mickaël Salaün, linux-security-module, Jann Horn,
	stable, Günther Noack
In-Reply-To: <20260407164107.2012589-1-mic@digikod.net>

LANDLOCK_RESTRICT_SELF_TSYNC does not allow
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with ruleset_fd=-1, preventing
a multithreaded process from atomically propagating subdomain log muting
to all threads without creating a domain layer.  Relax the fd=-1
condition to accept TSYNC alongside LOG_SUBDOMAINS_OFF, and update the
documentation accordingly.

Add flag validation tests for all TSYNC combinations with ruleset_fd=-1,
and audit tests verifying both transition directions: muting via TSYNC
(logged to not logged) and override via TSYNC (not logged to logged).

Cc: Günther Noack <gnoack@google.com>
Cc: stable@vger.kernel.org
Fixes: 42fc7e6543f6 ("landlock: Multithreading support for landlock_restrict_self()")
Reviewed-by: Günther Noack <gnoack3000@gmail.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---

Changes since v1:
https://lore.kernel.org/r/20260404085001.1604405-2-mic@digikod.net
- Zero-initialize struct thread_data to avoid flaky uninitialized
  mute_subdomains field (pointed out by Günther Noack).
- Drop capabilities and set no_new_privs in tsync_without_ruleset
  fixture setup instead of relying on ambient CAP_SYS_ADMIN (pointed
  out by Günther Noack).
- Add Reviewed-by Günther.
---
 include/uapi/linux/landlock.h                 |   4 +-
 security/landlock/syscalls.c                  |  14 +-
 tools/testing/selftests/landlock/audit_test.c | 233 ++++++++++++++++++
 tools/testing/selftests/landlock/tsync_test.c |  77 ++++++
 4 files changed, 322 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index f88fa1f68b77..d37603efc273 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -116,7 +116,9 @@ struct landlock_ruleset_attr {
  *     ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects
  *     future nested domains, not the one being created. It can also be used
  *     with a @ruleset_fd value of -1 to mute subdomain logs without creating a
- *     domain.
+ *     domain.  When combined with %LANDLOCK_RESTRICT_SELF_TSYNC and a
+ *     @ruleset_fd value of -1, this configuration is propagated to all threads
+ *     of the current process.
  *
  * The following flag supports policy enforcement in multithreaded processes:
  *
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 0d66a68677b7..a0bb664e0d31 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -512,10 +512,13 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 
 	/*
 	 * It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
-	 * -1 as ruleset_fd, but no other flag must be set.
+	 * -1 as ruleset_fd, optionally combined with
+	 * LANDLOCK_RESTRICT_SELF_TSYNC to propagate this configuration to all
+	 * threads.  No other flag must be set.
 	 */
 	if (!(ruleset_fd == -1 &&
-	      flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+	      (flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) ==
+		      LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
 		/* Gets and checks the ruleset. */
 		ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
 		if (IS_ERR(ruleset))
@@ -537,9 +540,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 
 	/*
 	 * The only case when a ruleset may not be set is if
-	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1.
-	 * We could optimize this case by not calling commit_creds() if this flag
-	 * was already set, but it is not worth the complexity.
+	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set (optionally with
+	 * LANDLOCK_RESTRICT_SELF_TSYNC) and ruleset_fd is -1.  We could
+	 * optimize this case by not calling commit_creds() if this flag was
+	 * already set, but it is not worth the complexity.
 	 */
 	if (ruleset) {
 		/*
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index 20099b8667e7..897596cd7c80 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -162,6 +162,7 @@ TEST_F(audit, layers)
 struct thread_data {
 	pid_t parent_pid;
 	int ruleset_fd, pipe_child, pipe_parent;
+	bool mute_subdomains;
 };
 
 static void *thread_audit_test(void *arg)
@@ -367,6 +368,238 @@ TEST_F(audit, log_subdomains_off_fork)
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 
+/*
+ * Thread function: runs two rounds of (create domain, trigger denial, signal
+ * back), waiting for the main thread before each round.  When mute_subdomains
+ * is set, phase 1 also mutes subdomain logs via the fd=-1 path before creating
+ * the domain.  The ruleset_fd is kept open across both rounds so each
+ * restrict_self call stacks a new domain layer.
+ */
+static void *thread_sandbox_deny_twice(void *arg)
+{
+	const struct thread_data *data = (struct thread_data *)arg;
+	uintptr_t err = 0;
+	char buffer;
+
+	/* Phase 1: optionally mutes, creates a domain, and triggers a denial. */
+	if (read(data->pipe_parent, &buffer, 1) != 1) {
+		err = 1;
+		goto out;
+	}
+
+	if (data->mute_subdomains &&
+	    landlock_restrict_self(-1,
+				   LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+		err = 2;
+		goto out;
+	}
+
+	if (landlock_restrict_self(data->ruleset_fd, 0)) {
+		err = 3;
+		goto out;
+	}
+
+	if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
+		err = 4;
+		goto out;
+	}
+
+	if (write(data->pipe_child, ".", 1) != 1) {
+		err = 5;
+		goto out;
+	}
+
+	/* Phase 2: stacks another domain and triggers a denial. */
+	if (read(data->pipe_parent, &buffer, 1) != 1) {
+		err = 6;
+		goto out;
+	}
+
+	if (landlock_restrict_self(data->ruleset_fd, 0)) {
+		err = 7;
+		goto out;
+	}
+
+	if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
+		err = 8;
+		goto out;
+	}
+
+	if (write(data->pipe_child, ".", 1) != 1) {
+		err = 9;
+		goto out;
+	}
+
+out:
+	close(data->ruleset_fd);
+	close(data->pipe_child);
+	close(data->pipe_parent);
+	return (void *)err;
+}
+
+/*
+ * Verifies that LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
+ * LANDLOCK_RESTRICT_SELF_TSYNC and ruleset_fd=-1 propagates log_subdomains_off
+ * to a sibling thread, suppressing audit logging on domains it subsequently
+ * creates.
+ *
+ * Phase 1 (before TSYNC) acts as an inline baseline: the sibling creates a
+ * domain and triggers a denial that IS logged.
+ *
+ * Phase 2 (after TSYNC) verifies suppression: the sibling stacks another domain
+ * and triggers a denial that is NOT logged.
+ */
+TEST_F(audit, log_subdomains_off_tsync)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.scoped = LANDLOCK_SCOPE_SIGNAL,
+	};
+	struct audit_records records;
+	struct thread_data child_data = {};
+	int pipe_child[2], pipe_parent[2];
+	char buffer;
+	pthread_t thread;
+	void *thread_ret;
+
+	child_data.parent_pid = getppid();
+	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+	child_data.pipe_child = pipe_child[1];
+	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+	child_data.pipe_parent = pipe_parent[0];
+	child_data.ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, child_data.ruleset_fd);
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+	/* Creates the sibling thread. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
+				    &child_data));
+
+	/*
+	 * Phase 1: the sibling creates a domain and triggers a denial before
+	 * any log muting.  This proves the audit path works.
+	 */
+	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+	/* The denial must be logged. */
+	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+					child_data.parent_pid, NULL));
+
+	/* Drains any remaining records (e.g. domain allocation). */
+	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+
+	/*
+	 * Mutes subdomain logs and propagates to the sibling thread via TSYNC,
+	 * without creating a domain.
+	 */
+	ASSERT_EQ(0, landlock_restrict_self(
+			     -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+					 LANDLOCK_RESTRICT_SELF_TSYNC));
+
+	/*
+	 * Phase 2: the sibling stacks another domain and triggers a denial.
+	 * Because log_subdomains_off was propagated via TSYNC, the new domain
+	 * has log_status=LANDLOCK_LOG_DISABLED.
+	 */
+	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+	/* No denial record should appear. */
+	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
+					      child_data.parent_pid, NULL));
+
+	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+	EXPECT_EQ(0, records.access);
+
+	EXPECT_EQ(0, close(pipe_child[0]));
+	EXPECT_EQ(0, close(pipe_parent[1]));
+	ASSERT_EQ(0, pthread_join(thread, &thread_ret));
+	EXPECT_EQ(NULL, thread_ret);
+}
+
+/*
+ * Verifies that LANDLOCK_RESTRICT_SELF_TSYNC without
+ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF overrides a sibling thread's
+ * log_subdomains_off, re-enabling audit logging on domains the sibling
+ * subsequently creates.
+ *
+ * Phase 1: the sibling sets log_subdomains_off, creates a muted domain, and
+ * triggers a denial that is NOT logged.
+ *
+ * Phase 2 (after TSYNC without LOG_SUBDOMAINS_OFF): the sibling stacks another
+ * domain and triggers a denial that IS logged, proving the muting was
+ * overridden.
+ */
+TEST_F(audit, tsync_override_log_subdomains_off)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.scoped = LANDLOCK_SCOPE_SIGNAL,
+	};
+	struct audit_records records;
+	struct thread_data child_data = {};
+	int pipe_child[2], pipe_parent[2];
+	char buffer;
+	pthread_t thread;
+	void *thread_ret;
+
+	child_data.parent_pid = getppid();
+	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+	child_data.pipe_child = pipe_child[1];
+	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+	child_data.pipe_parent = pipe_parent[0];
+	child_data.ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, child_data.ruleset_fd);
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+	child_data.mute_subdomains = true;
+
+	/* Creates the sibling thread. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
+				    &child_data));
+
+	/*
+	 * Phase 1: the sibling mutes subdomain logs, creates a domain, and
+	 * triggers a denial.  The denial must not be logged.
+	 */
+	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
+					      child_data.parent_pid, NULL));
+
+	/* Drains any remaining records. */
+	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+	EXPECT_EQ(0, records.access);
+
+	/*
+	 * Overrides the sibling's log_subdomains_off by calling TSYNC without
+	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.
+	 */
+	ASSERT_EQ(0, landlock_restrict_self(child_data.ruleset_fd,
+					    LANDLOCK_RESTRICT_SELF_TSYNC));
+
+	/*
+	 * Phase 2: the sibling stacks another domain and triggers a denial.
+	 * Because TSYNC replaced its log_subdomains_off with 0, the new domain
+	 * has log_status=LANDLOCK_LOG_PENDING.
+	 */
+	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+	/* The denial must be logged. */
+	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+					child_data.parent_pid, NULL));
+
+	EXPECT_EQ(0, close(pipe_child[0]));
+	EXPECT_EQ(0, close(pipe_parent[1]));
+	ASSERT_EQ(0, pthread_join(thread, &thread_ret));
+	EXPECT_EQ(NULL, thread_ret);
+}
+
 FIXTURE(audit_flags)
 {
 	struct audit_filter audit_filter;
diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
index 2b9ad4f154f4..9cf1491bbaaf 100644
--- a/tools/testing/selftests/landlock/tsync_test.c
+++ b/tools/testing/selftests/landlock/tsync_test.c
@@ -247,4 +247,81 @@ TEST(tsync_interrupt)
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 
+/* clang-format off */
+FIXTURE(tsync_without_ruleset) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(tsync_without_ruleset)
+{
+	const __u32 flags;
+	const int expected_errno;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, tsync_only) {
+	/* clang-format on */
+	.flags = LANDLOCK_RESTRICT_SELF_TSYNC,
+	.expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_same_exec_off) {
+	/* clang-format on */
+	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+		 LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+		 LANDLOCK_RESTRICT_SELF_TSYNC,
+	.expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_new_exec_on) {
+	/* clang-format on */
+	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+		 LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+		 LANDLOCK_RESTRICT_SELF_TSYNC,
+	.expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, all_flags) {
+	/* clang-format on */
+	.flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+		 LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+		 LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+		 LANDLOCK_RESTRICT_SELF_TSYNC,
+	.expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off) {
+	/* clang-format on */
+	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+		 LANDLOCK_RESTRICT_SELF_TSYNC,
+	.expected_errno = 0,
+};
+
+FIXTURE_SETUP(tsync_without_ruleset)
+{
+	disable_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN(tsync_without_ruleset)
+{
+}
+
+TEST_F(tsync_without_ruleset, check)
+{
+	int ret;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+	ret = landlock_restrict_self(-1, variant->flags);
+	if (variant->expected_errno) {
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(variant->expected_errno, errno);
+	} else {
+		EXPECT_EQ(0, ret);
+	}
+}
+
 TEST_HARNESS_MAIN
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v5 2/3] ima: trim N IMA event log records
From: Roberto Sassu @ 2026-04-07 16:19 UTC (permalink / raw)
  To: steven chen, linux-integrity
  Cc: zohar, roberto.sassu, dmitry.kasatkin, eric.snowberg, corbet,
	serge, paul, jmorris, linux-security-module, anirudhve,
	gregorylumen, nramas, sushring, linux-doc
In-Reply-To: <20260401172956.4581-3-chenste@linux.microsoft.com>

On Wed, 2026-04-01 at 10:29 -0700, steven chen wrote:
> Trim N entries of the IMA event logs. Do not clean the hash table.

The very first change of this patch is the kernel option
ima_flush_htable option that I introduced for my use case.

At the bottom of this patch you actually check the ima_flush_htable
boolean, and delete the measurements entries without disconnecting them
from the hash table, so the digest lookup is done on freed memory.

Next, you duplicated my changes regarding the measurements list
counter. But instead of removing the old counter from the hash table,
you keep incrementing both, but use the new one.

In ima_log_trim_open(), you use again my duplicated code to manage
exclusive write/concurrent read scheme for the measurement interfaces.
However, for read, if the process does not have CAP_SYS_ADMIN it falls
back calling _ima_measurements_open(). Not sure it was intended.

And, in ima_log_trim_release(), you check again CAP_SYS_ADMIN which is
redundant, you would not reach this code if the same requirements were
not met at open time. You also return an error on close().

In ima_log_trim_write(), you do manual string to number conversion for
your first number and use kstrtoul() for the second.

The measurements lists and the associated counter are atomically
updated in ima_add_digest_entry(), but not atomically accessed in
ima_delete_event_log(). Also, the measurements list is traversed
without _rcu variant or lock.

While this trimming scheme aims at minimizing the kernel space and user
space delay, it also introduces the following problem. If two agents
perform a TPM quote that include a different number of entries, there
is no guarantee that the one willing to trim less entries wins. Which
means that, one agent could end up not seeing the most recent entries,
as they were already trimmed by the other agent.

My solution is not affected by this problem, since there will be only
one process collecting all the measurements in user space and exposing
them to the agents.

Also, I didn't understand why T and ima_measure_users have to be
preserved on soft reboots. Especially ima_measure_users reflects the
state of open files for a particular kernel, but on soft reboot a new
kernel is booted.

I personally will not endorse a solution based on the ima_trim_log
interface. I could accept trimming N even more efficiently than we
currently do with a lockless walk to determine the cutting position in
ima_queue_stage(), so that we don't need to splice back entries to the
measurement list. This would be a replacement of patch 11 in my patch
set, but this would be as far as I would like to go.

Roberto

> The values saved in hash table were already used.
> 
> Provide a userspace interface ima_trim_log:
> When read this interface, it returns total number T of entries trimmed
> since system boot up.
> When write to this interface need to provide two numbers T:N to let
> kernel to trim N entries of IMA event logs.
> 
> Kernel measurement list lock time performance improvement by not
> clean the hash table.
> 
> when kernel get log trim request T:N
>  - Get the T, compare with the total trimmed number
>  - if equal, then do trim N and change T to T+N
>  - else return error
> 
> Signed-off-by: steven chen <chenste@linux.microsoft.com>
> ---
>  .../admin-guide/kernel-parameters.txt         |   4 +
>  security/integrity/ima/ima.h                  |   4 +-
>  security/integrity/ima/ima_fs.c               | 198 +++++++++++++++++-
>  security/integrity/ima/ima_kexec.c            |   2 +-
>  security/integrity/ima/ima_queue.c            |  96 +++++++++
>  5 files changed, 296 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index e92c0056e4e0..cd1a1d0bf0e2 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2197,6 +2197,10 @@
>  			Use the canonical format for the binary runtime
>  			measurements, instead of host native format.
>  
> +	ima_flush_htable  [IMA]
> +			Flush the measurement list hash table when trim all
> +			or a part of it for deletion.
> +
>  	ima_hash=	[IMA]
>  			Format: { md5 | sha1 | rmd160 | sha256 | sha384
>  				   | sha512 | ... }
> diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> index e3d71d8d56e3..5cbee3a295a0 100644
> --- a/security/integrity/ima/ima.h
> +++ b/security/integrity/ima/ima.h
> @@ -243,11 +243,13 @@ void ima_post_key_create_or_update(struct key *keyring, struct key *key,
>  				   const void *payload, size_t plen,
>  				   unsigned long flags, bool create);
>  #endif
> -
> +extern atomic_long_t ima_number_entries;
>  #ifdef CONFIG_IMA_KEXEC
>  void ima_measure_kexec_event(const char *event_name);
> +long ima_delete_event_log(long req_val);
>  #else
>  static inline void ima_measure_kexec_event(const char *event_name) {}
> +static inline long ima_delete_event_log(long req_val) { return 0; }
>  #endif
>  
>  /*
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 87045b09f120..8e26e0f34311 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -21,6 +21,9 @@
>  #include <linux/rcupdate.h>
>  #include <linux/parser.h>
>  #include <linux/vmalloc.h>
> +#include <linux/ktime.h>
> +#include <linux/timekeeping.h>
> +#include <linux/ima.h>
>  
>  #include "ima.h"
>  
> @@ -38,6 +41,17 @@ __setup("ima_canonical_fmt", default_canonical_fmt_setup);
>  
>  static int valid_policy = 1;
>  
> +#define IMA_LOG_TRIM_REQ_NUM_LENGTH 15
> +#define IMA_LOG_TRIM_REQ_TOTAL_LENGTH 32
> +atomic_long_t ima_number_entries = ATOMIC_LONG_INIT(0);
> +static long trimcount;
> +/* mutex protects atomicity of trimming measurement list
> + * and also protects atomicity the measurement list read
> + * write operation.
> + */
> +static DEFINE_MUTEX(ima_measure_lock);
> +static long ima_measure_users;
> +
>  static ssize_t ima_show_htable_value(char __user *buf, size_t count,
>  				     loff_t *ppos, atomic_long_t *val)
>  {
> @@ -64,8 +78,7 @@ static ssize_t ima_show_measurements_count(struct file *filp,
>  					   char __user *buf,
>  					   size_t count, loff_t *ppos)
>  {
> -	return ima_show_htable_value(buf, count, ppos, &ima_htable.len);
> -
> +	return ima_show_htable_value(buf, count, ppos, &ima_number_entries);
>  }
>  
>  static const struct file_operations ima_measurements_count_ops = {
> @@ -202,16 +215,77 @@ static const struct seq_operations ima_measurments_seqops = {
>  	.show = ima_measurements_show
>  };
>  
> +/*
> + * _ima_measurements_open - open the IMA measurements file
> + * @inode: inode of the file being opened
> + * @file: file being opened
> + * @seq_ops: sequence operations for the file
> + *
> + * Returns 0 on success, or negative error code.
> + * Implements mutual exclusion between readers and writer
> + * of the measurements file. Multiple readers are allowed,
> + * but writer get exclusive access only no other readers/writers.
> + * Readers is not allowed when there is a writer.
> + */
> +static int _ima_measurements_open(struct inode *inode, struct file *file,
> +				  const struct seq_operations *seq_ops)
> +{
> +	bool write = !!(file->f_mode & FMODE_WRITE);
> +	int ret;
> +
> +	if (write && !capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	mutex_lock(&ima_measure_lock);
> +	if ((write && ima_measure_users != 0) ||
> +	    (!write && ima_measure_users < 0)) {
> +		mutex_unlock(&ima_measure_lock);
> +		return -EBUSY;
> +	}
> +
> +	ret = seq_open(file, seq_ops);
> +	if (ret < 0) {
> +		mutex_unlock(&ima_measure_lock);
> +		return ret;
> +	}
> +
> +	if (write)
> +		ima_measure_users--;
> +	else
> +		ima_measure_users++;
> +
> +	mutex_unlock(&ima_measure_lock);
> +	return ret;
> +}
> +
>  static int ima_measurements_open(struct inode *inode, struct file *file)
>  {
> -	return seq_open(file, &ima_measurments_seqops);
> +	return _ima_measurements_open(inode, file, &ima_measurments_seqops);
> +}
> +
> +static int ima_measurements_release(struct inode *inode, struct file *file)
> +{
> +	bool write = !!(file->f_mode & FMODE_WRITE);
> +	int ret;
> +
> +	mutex_lock(&ima_measure_lock);
> +	ret = seq_release(inode, file);
> +	if (!ret) {
> +		if (!write)
> +			ima_measure_users--;
> +		else
> +			ima_measure_users++;
> +	}
> +
> +	mutex_unlock(&ima_measure_lock);
> +	return ret;
>  }
>  
>  static const struct file_operations ima_measurements_ops = {
>  	.open = ima_measurements_open,
>  	.read = seq_read,
>  	.llseek = seq_lseek,
> -	.release = seq_release,
> +	.release = ima_measurements_release,
>  };
>  
>  void ima_print_digest(struct seq_file *m, u8 *digest, u32 size)
> @@ -279,14 +353,114 @@ static const struct seq_operations ima_ascii_measurements_seqops = {
>  
>  static int ima_ascii_measurements_open(struct inode *inode, struct file *file)
>  {
> -	return seq_open(file, &ima_ascii_measurements_seqops);
> +	return _ima_measurements_open(inode, file, &ima_ascii_measurements_seqops);
>  }
>  
>  static const struct file_operations ima_ascii_measurements_ops = {
>  	.open = ima_ascii_measurements_open,
>  	.read = seq_read,
>  	.llseek = seq_lseek,
> -	.release = seq_release,
> +	.release = ima_measurements_release,
> +};
> +
> +static int ima_log_trim_open(struct inode *inode, struct file *file)
> +{
> +	bool write = !!(file->f_mode & FMODE_WRITE);
> +
> +	if (!write && capable(CAP_SYS_ADMIN))
> +		return 0;
> +	else if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return _ima_measurements_open(inode, file, &ima_measurments_seqops);
> +}
> +
> +static ssize_t ima_log_trim_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
> +{
> +	char tmpbuf[IMA_LOG_TRIM_REQ_NUM_LENGTH];
> +	ssize_t len;
> +
> +	len = scnprintf(tmpbuf, sizeof(tmpbuf), "%li\n", trimcount);
> +	return simple_read_from_buffer(buf, size, ppos, tmpbuf, len);
> +}
> +
> +static ssize_t ima_log_trim_write(struct file *file,
> +				  const char __user *buf, size_t datalen, loff_t *ppos)
> +{
> +	char tmpbuf[IMA_LOG_TRIM_REQ_TOTAL_LENGTH];
> +	char *p = tmpbuf;
> +	long count, ret, val = 0, max = LONG_MAX;
> +
> +	if (*ppos > 0 || datalen > IMA_LOG_TRIM_REQ_TOTAL_LENGTH || datalen < 2) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (copy_from_user(tmpbuf, buf, datalen) != 0) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	p = tmpbuf;
> +
> +	while (*p && *p != ':') {
> +		if (!isdigit((unsigned char)*p))
> +			return -EINVAL;
> +
> +		/* digit value */
> +		int d = *p - '0';
> +
> +		/* overflow check: val * 10 + d > max -> (val > (max - d) / 10) */
> +		if (val > (max - d) / 10)
> +			return -ERANGE;
> +
> +		val = val * 10 + d;
> +		p++;
> +	}
> +
> +	if (*p != ':')
> +		return -EINVAL;
> +
> +	/* verify trim count matches */
> +	if (val != trimcount)
> +		return -EINVAL;
> +
> +	p++; /* skip ':' */
> +	ret = kstrtoul(p, 0, &count);
> +
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = ima_delete_event_log(count);
> +
> +	if (ret < 0)
> +		goto out;
> +
> +	trimcount += ret;
> +
> +	ret = datalen;
> +out:
> +	return ret;
> +}
> +
> +static int ima_log_trim_release(struct inode *inode, struct file *file)
> +{
> +	bool write = !!(file->f_mode & FMODE_WRITE);
> +
> +	if (!write && capable(CAP_SYS_ADMIN))
> +		return 0;
> +	else if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return ima_measurements_release(inode, file);
> +}
> +
> +static const struct file_operations ima_log_trim_ops = {
> +	.open = ima_log_trim_open,
> +	.read = ima_log_trim_read,
> +	.write = ima_log_trim_write,
> +	.llseek = generic_file_llseek,
> +	.release = ima_log_trim_release
>  };
>  
>  static ssize_t ima_read_policy(char *path)
> @@ -528,6 +702,18 @@ int __init ima_fs_init(void)
>  		goto out;
>  	}
>  
> +	if (IS_ENABLED(CONFIG_IMA_LOG_TRIMMING)) {
> +		dentry = securityfs_create_file("ima_trim_log",
> +						S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP,
> +						ima_dir, NULL, &ima_log_trim_ops);
> +		if (IS_ERR(dentry)) {
> +			ret = PTR_ERR(dentry);
> +			goto out;
> +		}
> +	}
> +
> +	trimcount = 0;
> +
>  	dentry = securityfs_create_file("runtime_measurements_count",
>  				   S_IRUSR | S_IRGRP, ima_dir, NULL,
>  				   &ima_measurements_count_ops);
> diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
> index 7362f68f2d8b..bee997683e03 100644
> --- a/security/integrity/ima/ima_kexec.c
> +++ b/security/integrity/ima/ima_kexec.c
> @@ -41,7 +41,7 @@ void ima_measure_kexec_event(const char *event_name)
>  	int n;
>  
>  	buf_size = ima_get_binary_runtime_size();
> -	len = atomic_long_read(&ima_htable.len);
> +	len = atomic_long_read(&ima_number_entries);
>  
>  	n = scnprintf(ima_kexec_event, IMA_KEXEC_EVENT_LEN,
>  		      "kexec_segment_size=%lu;ima_binary_runtime_size=%lu;"
> diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
> index 590637e81ad1..07225e19b9b5 100644
> --- a/security/integrity/ima/ima_queue.c
> +++ b/security/integrity/ima/ima_queue.c
> @@ -22,6 +22,14 @@
>  
>  #define AUDIT_CAUSE_LEN_MAX 32
>  
> +bool ima_flush_htable;
> +static int __init ima_flush_htable_setup(char *str)
> +{
> +	ima_flush_htable = true;
> +	return 1;
> +}
> +__setup("ima_flush_htable", ima_flush_htable_setup);
> +
>  /* pre-allocated array of tpm_digest structures to extend a PCR */
>  static struct tpm_digest *digests;
>  
> @@ -114,6 +122,7 @@ static int ima_add_digest_entry(struct ima_template_entry *entry,
>  	list_add_tail_rcu(&qe->later, &ima_measurements);
>  
>  	atomic_long_inc(&ima_htable.len);
> +	atomic_long_inc(&ima_number_entries);
>  	if (update_htable) {
>  		key = ima_hash_key(entry->digests[ima_hash_algo_idx].digest);
>  		hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]);
> @@ -220,6 +229,93 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
>  	return result;
>  }
>  
> +/**
> + * ima_delete_event_log - delete IMA event entry
> + * @num_records: number of records to delete
> + *
> + * delete num_records entries off the measurement list.
> + * Returns num_records, or negative error code.
> + */
> +long ima_delete_event_log(long num_records)
> +{
> +	long len, cur = num_records, tmp_len = 0;
> +	struct ima_queue_entry *qe, *qe_tmp;
> +	LIST_HEAD(ima_measurements_to_delete);
> +	struct list_head *list_ptr;
> +
> +	if (!IS_ENABLED(CONFIG_IMA_LOG_TRIMMING))
> +		return -EOPNOTSUPP;
> +
> +	if (num_records <= 0)
> +		return num_records;
> +
> +	list_ptr = &ima_measurements;
> +
> +	len = atomic_long_read(&ima_number_entries);
> +
> +	if (num_records <= len) {
> +		list_for_each_entry(qe, list_ptr, later) {
> +			if (cur > 0) {
> +				tmp_len += get_binary_runtime_size(qe->entry);
> +				--cur;
> +			}
> +			if (cur == 0) {
> +				qe_tmp = qe;
> +				break;
> +			}
> +		}
> +	}
> +	else {
> +		return -ENOENT;
> +	}
> +
> +
> +	mutex_lock(&ima_extend_list_mutex);
> +	len = atomic_long_read(&ima_number_entries);
> +
> +	if (num_records == len) {
> +		list_replace(&ima_measurements, &ima_measurements_to_delete);
> +		INIT_LIST_HEAD(&ima_measurements);
> +		atomic_long_set(&ima_number_entries, 0);
> +		list_ptr = &ima_measurements_to_delete;
> +	}
> +	else {
> +		__list_cut_position(&ima_measurements_to_delete, &ima_measurements,
> +				    &qe_tmp->later);
> +		atomic_long_sub(num_records, &ima_number_entries);
> +		if (IS_ENABLED(CONFIG_IMA_KEXEC))
> +			binary_runtime_size -= tmp_len;
> +	}
> +
> +	mutex_unlock(&ima_extend_list_mutex);
> +
> +	if (ima_flush_htable)
> +		synchronize_rcu();
> +
> +	list_for_each_entry_safe(qe, qe_tmp, &ima_measurements_to_delete, later) {
> +		/*
> +		 * Ok because after list delete qe is only accessed by
> +		 * ima_lookup_digest_entry().
> +		 */
> +		for (int i = 0; i < qe->entry->template_desc->num_fields; i++) {
> +			kfree(qe->entry->template_data[i].data);
> +			qe->entry->template_data[i].data = NULL;
> +			qe->entry->template_data[i].len = 0;
> +		}
> +
> +		list_del(&qe->later);
> +
> +		/* No leak if !ima_flush_htable, referenced by ima_htable. */
> +		if (ima_flush_htable) {
> +			kfree(qe->entry->digests);
> +			kfree(qe->entry);
> +			kfree(qe);
> +		}
> +	}
> +
> +	return num_records;
> +}
> +
>  int ima_restore_measurement_entry(struct ima_template_entry *entry)
>  {
>  	int result = 0;


^ permalink raw reply

* Re: [PATCH v1 2/2] landlock: Allow TSYNC with LOG_SUBDOMAINS_OFF and fd=-1
From: Mickaël Salaün @ 2026-04-07 16:06 UTC (permalink / raw)
  To: Günther Noack; +Cc: Günther Noack, linux-security-module, stable
In-Reply-To: <20260407.7d922b20e863@gnoack.org>

On Tue, Apr 07, 2026 at 10:25:30AM +0200, Günther Noack wrote:
> Hello!
> 
> On Sat, Apr 04, 2026 at 10:49:58AM +0200, Mickaël Salaün wrote:
> > LANDLOCK_RESTRICT_SELF_TSYNC does not allow
> > LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with ruleset_fd=-1, preventing
> > a multithreaded process from atomically propagating subdomain log muting
> > to all threads without creating a domain layer.  Relax the fd=-1
> > condition to accept TSYNC alongside LOG_SUBDOMAINS_OFF, and update the
> > documentation accordingly.
> > 
> > Add flag validation tests for all TSYNC combinations with ruleset_fd=-1,
> > and audit tests verifying both transition directions: muting via TSYNC
> > (logged to not logged) and override via TSYNC (not logged to logged).
> > 
> > Cc: Günther Noack <gnoack@google.com>
> > Cc: stable@vger.kernel.org
> > Fixes: 42fc7e6543f6 ("landlock: Multithreading support for landlock_restrict_self()")
> > Signed-off-by: Mickaël Salaün <mic@digikod.net>
> > ---
> >  include/uapi/linux/landlock.h                 |   4 +-
> >  security/landlock/syscalls.c                  |  14 +-
> >  tools/testing/selftests/landlock/audit_test.c | 233 ++++++++++++++++++
> >  tools/testing/selftests/landlock/tsync_test.c |  74 ++++++
> >  4 files changed, 319 insertions(+), 6 deletions(-)
> > 
> > diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
> > index f88fa1f68b77..d37603efc273 100644
> > --- a/include/uapi/linux/landlock.h
> > +++ b/include/uapi/linux/landlock.h
> > @@ -116,7 +116,9 @@ struct landlock_ruleset_attr {
> >   *     ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects
> >   *     future nested domains, not the one being created. It can also be used
> >   *     with a @ruleset_fd value of -1 to mute subdomain logs without creating a
> > - *     domain.
> > + *     domain.  When combined with %LANDLOCK_RESTRICT_SELF_TSYNC and a
> > + *     @ruleset_fd value of -1, this configuration is propagated to all threads
> > + *     of the current process.
> >   *
> >   * The following flag supports policy enforcement in multithreaded processes:
> >   *
> > diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
> > index 0d66a68677b7..a0bb664e0d31 100644
> > --- a/security/landlock/syscalls.c
> > +++ b/security/landlock/syscalls.c
> > @@ -512,10 +512,13 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
> >  
> >  	/*
> >  	 * It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
> > -	 * -1 as ruleset_fd, but no other flag must be set.
> > +	 * -1 as ruleset_fd, optionally combined with
> > +	 * LANDLOCK_RESTRICT_SELF_TSYNC to propagate this configuration to all
> > +	 * threads.  No other flag must be set.
> >  	 */
> >  	if (!(ruleset_fd == -1 &&
> > -	      flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
> > +	      (flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) ==
> > +		      LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
> 
> Well spotted, thanks!
> 
> 
> >  		/* Gets and checks the ruleset. */
> >  		ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
> >  		if (IS_ERR(ruleset))
> > @@ -537,9 +540,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
> >  
> >  	/*
> >  	 * The only case when a ruleset may not be set is if
> > -	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1.
> > -	 * We could optimize this case by not calling commit_creds() if this flag
> > -	 * was already set, but it is not worth the complexity.
> > +	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set (optionally with
> > +	 * LANDLOCK_RESTRICT_SELF_TSYNC) and ruleset_fd is -1.  We could
> > +	 * optimize this case by not calling commit_creds() if this flag was
> > +	 * already set, but it is not worth the complexity.
> >  	 */
> >  	if (ruleset) {
> >  		/*
> > diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
> > index 20099b8667e7..a193d8a97560 100644
> > --- a/tools/testing/selftests/landlock/audit_test.c
> > +++ b/tools/testing/selftests/landlock/audit_test.c
> > @@ -162,6 +162,7 @@ TEST_F(audit, layers)
> >  struct thread_data {
> >  	pid_t parent_pid;
> >  	int ruleset_fd, pipe_child, pipe_parent;
> > +	bool mute_subdomains;
> >  };
> >  
> >  static void *thread_audit_test(void *arg)
> > @@ -367,6 +368,238 @@ TEST_F(audit, log_subdomains_off_fork)
> >  	EXPECT_EQ(0, close(ruleset_fd));
> >  }
> >  
> > +/*
> > + * Thread function: runs two rounds of (create domain, trigger denial, signal
> > + * back), waiting for the main thread before each round.  When mute_subdomains
> > + * is set, phase 1 also mutes subdomain logs via the fd=-1 path before creating
> > + * the domain.  The ruleset_fd is kept open across both rounds so each
> > + * restrict_self call stacks a new domain layer.
> > + */
> > +static void *thread_sandbox_deny_twice(void *arg)
> > +{
> > +	const struct thread_data *data = (struct thread_data *)arg;
> > +	uintptr_t err = 0;
> > +	char buffer;
> > +
> > +	/* Phase 1: optionally mutes, creates a domain, and triggers a denial. */
> > +	if (read(data->pipe_parent, &buffer, 1) != 1) {
> > +		err = 1;
> > +		goto out;
> > +	}
> > +
> > +	if (data->mute_subdomains &&
> > +	    landlock_restrict_self(-1,
> > +				   LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
> > +		err = 2;
> > +		goto out;
> > +	}
> > +
> > +	if (landlock_restrict_self(data->ruleset_fd, 0)) {
> > +		err = 3;
> > +		goto out;
> > +	}
> > +
> > +	if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
> > +		err = 4;
> > +		goto out;
> > +	}
> > +
> > +	if (write(data->pipe_child, ".", 1) != 1) {
> > +		err = 5;
> > +		goto out;
> > +	}
> > +
> > +	/* Phase 2: stacks another domain and triggers a denial. */
> > +	if (read(data->pipe_parent, &buffer, 1) != 1) {
> > +		err = 6;
> > +		goto out;
> > +	}
> > +
> > +	if (landlock_restrict_self(data->ruleset_fd, 0)) {
> > +		err = 7;
> > +		goto out;
> > +	}
> > +
> > +	if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
> > +		err = 8;
> > +		goto out;
> > +	}
> > +
> > +	if (write(data->pipe_child, ".", 1) != 1) {
> > +		err = 9;
> > +		goto out;
> > +	}
> > +
> > +out:
> > +	close(data->ruleset_fd);
> > +	close(data->pipe_child);
> > +	close(data->pipe_parent);
> > +	return (void *)err;
> > +}
> > +
> > +/*
> > + * Verifies that LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
> > + * LANDLOCK_RESTRICT_SELF_TSYNC and ruleset_fd=-1 propagates log_subdomains_off
> > + * to a sibling thread, suppressing audit logging on domains it subsequently
> > + * creates.
> > + *
> > + * Phase 1 (before TSYNC) acts as an inline baseline: the sibling creates a
> > + * domain and triggers a denial that IS logged.
> > + *
> > + * Phase 2 (after TSYNC) verifies suppression: the sibling stacks another domain
> > + * and triggers a denial that is NOT logged.
> > + */
> > +TEST_F(audit, log_subdomains_off_tsync)
> > +{
> > +	const struct landlock_ruleset_attr ruleset_attr = {
> > +		.scoped = LANDLOCK_SCOPE_SIGNAL,
> > +	};
> > +	struct audit_records records;
> > +	struct thread_data child_data;
> 
> The child_data.mute_subdomains field stays uninitialized in this
> function (and maybe others).  Please fix.
> 
>    struct thread_data child_data = {};

Well spotted!

> 
> 
> > +	int pipe_child[2], pipe_parent[2];
> > +	char buffer;
> > +	pthread_t thread;
> > +	void *thread_ret;
> > +
> > +	child_data.parent_pid = getppid();
> > +	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
> > +	child_data.pipe_child = pipe_child[1];
> > +	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
> > +	child_data.pipe_parent = pipe_parent[0];
> > +	child_data.ruleset_fd =
> > +		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
> > +	ASSERT_LE(0, child_data.ruleset_fd);
> > +
> > +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
> > +
> > +	/* Creates the sibling thread. */
> > +	ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
> > +				    &child_data));
> > +
> > +	/*
> > +	 * Phase 1: the sibling creates a domain and triggers a denial before
> > +	 * any log muting.  This proves the audit path works.
> > +	 */
> > +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> > +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> > +
> > +	/* The denial must be logged. */
> > +	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
> > +					child_data.parent_pid, NULL));
> > +
> > +	/* Drains any remaining records (e.g. domain allocation). */
> > +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> > +
> > +	/*
> > +	 * Mutes subdomain logs and propagates to the sibling thread via TSYNC,
> > +	 * without creating a domain.
> > +	 */
> > +	ASSERT_EQ(0, landlock_restrict_self(
> > +			     -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> > +					 LANDLOCK_RESTRICT_SELF_TSYNC));
> > +
> > +	/*
> > +	 * Phase 2: the sibling stacks another domain and triggers a denial.
> > +	 * Because log_subdomains_off was propagated via TSYNC, the new domain
> > +	 * has log_status=LANDLOCK_LOG_DISABLED.
> > +	 */
> > +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> > +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> > +
> > +	/* No denial record should appear. */
> > +	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
> > +					      child_data.parent_pid, NULL));
> > +
> > +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> > +	EXPECT_EQ(0, records.access);
> > +
> > +	EXPECT_EQ(0, close(pipe_child[0]));
> > +	EXPECT_EQ(0, close(pipe_parent[1]));
> > +	ASSERT_EQ(0, pthread_join(thread, &thread_ret));
> > +	EXPECT_EQ(NULL, thread_ret);
> > +}
> > +
> > +/*
> > + * Verifies that LANDLOCK_RESTRICT_SELF_TSYNC without
> > + * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF overrides a sibling thread's
> > + * log_subdomains_off, re-enabling audit logging on domains the sibling
> > + * subsequently creates.
> > + *
> > + * Phase 1: the sibling sets log_subdomains_off, creates a muted domain, and
> > + * triggers a denial that is NOT logged.
> > + *
> > + * Phase 2 (after TSYNC without LOG_SUBDOMAINS_OFF): the sibling stacks another
> > + * domain and triggers a denial that IS logged, proving the muting was
> > + * overridden.
> > + */
> > +TEST_F(audit, tsync_override_log_subdomains_off)
> > +{
> > +	const struct landlock_ruleset_attr ruleset_attr = {
> > +		.scoped = LANDLOCK_SCOPE_SIGNAL,
> > +	};
> > +	struct audit_records records;
> > +	struct thread_data child_data;
> > +	int pipe_child[2], pipe_parent[2];
> > +	char buffer;
> > +	pthread_t thread;
> > +	void *thread_ret;
> > +
> > +	child_data.parent_pid = getppid();
> > +	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
> > +	child_data.pipe_child = pipe_child[1];
> > +	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
> > +	child_data.pipe_parent = pipe_parent[0];
> > +	child_data.ruleset_fd =
> > +		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
> > +	ASSERT_LE(0, child_data.ruleset_fd);
> > +
> > +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
> > +
> > +	child_data.mute_subdomains = true;
> > +
> > +	/* Creates the sibling thread. */
> > +	ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
> > +				    &child_data));
> > +
> > +	/*
> > +	 * Phase 1: the sibling mutes subdomain logs, creates a domain, and
> > +	 * triggers a denial.  The denial must not be logged.
> > +	 */
> > +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> > +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> > +
> > +	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
> > +					      child_data.parent_pid, NULL));
> > +
> > +	/* Drains any remaining records. */
> > +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> > +	EXPECT_EQ(0, records.access);
> > +
> > +	/*
> > +	 * Overrides the sibling's log_subdomains_off by calling TSYNC without
> > +	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.
> > +	 */
> > +	ASSERT_EQ(0, landlock_restrict_self(child_data.ruleset_fd,
> > +					    LANDLOCK_RESTRICT_SELF_TSYNC));
> > +
> > +	/*
> > +	 * Phase 2: the sibling stacks another domain and triggers a denial.
> > +	 * Because TSYNC replaced its log_subdomains_off with 0, the new domain
> > +	 * has log_status=LANDLOCK_LOG_PENDING.
> > +	 */
> > +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> > +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> > +
> > +	/* The denial must be logged. */
> > +	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
> > +					child_data.parent_pid, NULL));
> > +
> > +	EXPECT_EQ(0, close(pipe_child[0]));
> > +	EXPECT_EQ(0, close(pipe_parent[1]));
> > +	ASSERT_EQ(0, pthread_join(thread, &thread_ret));
> > +	EXPECT_EQ(NULL, thread_ret);
> > +}
> > +
> >  FIXTURE(audit_flags)
> >  {
> >  	struct audit_filter audit_filter;
> > diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
> > index 2b9ad4f154f4..abc290271a1a 100644
> > --- a/tools/testing/selftests/landlock/tsync_test.c
> > +++ b/tools/testing/selftests/landlock/tsync_test.c
> > @@ -247,4 +247,78 @@ TEST(tsync_interrupt)
> >  	EXPECT_EQ(0, close(ruleset_fd));
> >  }
> >  
> > +/* clang-format off */
> > +FIXTURE(tsync_without_ruleset) {};
> > +/* clang-format on */
> > +
> > +FIXTURE_VARIANT(tsync_without_ruleset)
> > +{
> > +	const __u32 flags;
> > +	const int expected_errno;
> > +};
> > +
> > +/* clang-format off */
> > +FIXTURE_VARIANT_ADD(tsync_without_ruleset, tsync_only) {
> > +	/* clang-format on */
> > +	.flags = LANDLOCK_RESTRICT_SELF_TSYNC,
> > +	.expected_errno = EBADF,
> > +};
> > +
> > +/* clang-format off */
> > +FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_same_exec_off) {
> > +	/* clang-format on */
> > +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> > +		 LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
> > +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> > +	.expected_errno = EBADF,
> > +};
> > +
> > +/* clang-format off */
> > +FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_new_exec_on) {
> > +	/* clang-format on */
> > +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> > +		 LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
> > +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> > +	.expected_errno = EBADF,
> > +};
> > +
> > +/* clang-format off */
> > +FIXTURE_VARIANT_ADD(tsync_without_ruleset, all_flags) {
> > +	/* clang-format on */
> > +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
> > +		 LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
> > +		 LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> > +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> > +	.expected_errno = EBADF,
> > +};
> > +
> > +/* clang-format off */
> > +FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off) {
> > +	/* clang-format on */
> > +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> > +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> > +	.expected_errno = 0,
> > +};
> > +
> > +FIXTURE_SETUP(tsync_without_ruleset)
> > +{
> > +}
> > +
> > +FIXTURE_TEARDOWN(tsync_without_ruleset)
> > +{
> > +}
> > +
> > +TEST_F(tsync_without_ruleset, check)
> > +{
> > +	int ret;
> > +

I'll set NNP here.

> > +	ret = landlock_restrict_self(-1, variant->flags);
> > +	if (variant->expected_errno) {
> > +		EXPECT_EQ(-1, ret);
> > +		EXPECT_EQ(variant->expected_errno, errno);
> > +	} else {
> > +		EXPECT_EQ(0, ret);
> > +	}
> > +}
> 
> We are not setting the no_new_privs flag in this test, as we do in the
> others.
> 
> no_new_privs or CAP_SYS_ADMIN are required in the implementation, even
> when ruleset_fd == -1 and passing
> LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.

Sure.

> 
> > +
> >  TEST_HARNESS_MAIN
> > -- 
> > 2.53.0
> > 
> 
> Reviewed-by: Günther Noack <gnoack3000@gmail.com>
> 
> But please fix the flaky test.
> 
> –Günther
> 

^ permalink raw reply

* Re: [PATCH v1 1/2] landlock: Fix log_subdomains_off inheritance across fork()
From: Mickaël Salaün @ 2026-04-07 16:03 UTC (permalink / raw)
  To: Günther Noack, Jann Horn
  Cc: Günther Noack, linux-security-module, stable
In-Reply-To: <20260407.844e42deb531@gnoack.org>

On Tue, Apr 07, 2026 at 09:30:40AM +0200, Günther Noack wrote:
> Hello!
> 
> On Sat, Apr 04, 2026 at 10:49:57AM +0200, Mickaël Salaün wrote:
> > hook_cred_transfer() only copies the Landlock security blob when the
> > source credential has a domain.  This is inconsistent with
> > landlock_restrict_self() which can set log_subdomains_off on a
> > credential without creating a domain (via the ruleset_fd=-1 path): the
> > field is committed but not preserved across fork() because the child's
> > prepare_creds() calls hook_cred_transfer() which skips the copy when
> > domain is NULL.
> > 
> > This breaks the documented use case where a process mutes subdomain logs
> > before forking sandboxed children: the children lose the muting and
> > their domains produce unexpected audit records.
> > 
> > Fix this by unconditionally copying the Landlock credential blob.
> > landlock_get_ruleset(NULL) is already a safe no-op.
> > 
> > Cc: Günther Noack <gnoack@google.com>
> > Cc: stable@vger.kernel.org
> > Fixes: ead9079f7569 ("landlock: Add LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF")
> > Signed-off-by: Mickaël Salaün <mic@digikod.net>
> > ---
> >  security/landlock/cred.c                      |  6 +-
> >  tools/testing/selftests/landlock/audit_test.c | 88 +++++++++++++++++++
> >  2 files changed, 90 insertions(+), 4 deletions(-)
> > 
> > diff --git a/security/landlock/cred.c b/security/landlock/cred.c
> > index 0cb3edde4d18..cc419de75cd6 100644
> > --- a/security/landlock/cred.c
> > +++ b/security/landlock/cred.c
> > @@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cred *const new,
> >  	const struct landlock_cred_security *const old_llcred =
> >  		landlock_cred(old);
> >  
> > -	if (old_llcred->domain) {
> > -		landlock_get_ruleset(old_llcred->domain);
> > -		*landlock_cred(new) = *old_llcred;
> > -	}
> > +	landlock_get_ruleset(old_llcred->domain);
> > +	*landlock_cred(new) = *old_llcred;
> 
> This fix looks correct for the hook_cred_prepare() case (and of
> course, hook_cred_prepare() calls hook_cred_transfer() in Landlock).
> 
> 
> But I'm afraid I might have spotted another issue here:
> 
> If I look at the code in security/keys/process_keys.c, where
> security_tranfer_creds() is called, the "old" object is actually
> already initialized, and if we are not checking for that, I think we
> are leaking memory.

old is only a partially initialized credential, and the Landlock
part is not set yet, which is the goal of hook_transfer_creds(), so
there is no leak.

> 
> I would suggest to use the helper landlock_cred_copy() from cred.h for

This is not required but if we would like to do it anyway, that would
not be backportable and would introduce a (minimal) performance penalty.

> that.  This one is anyway supposed to be the central place for this
> copying logic, and it is safe to use with zeroed-out target objects
> (because the put is safe for the NULL-pointer).
> 
> Maybe this is worth updating while we are at it?
> 
> 
> >  }
> >  
> >  static int hook_cred_prepare(struct cred *const new,
> > diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
> > index 46d02d49835a..20099b8667e7 100644
> > --- a/tools/testing/selftests/landlock/audit_test.c
> > +++ b/tools/testing/selftests/landlock/audit_test.c
> > @@ -279,6 +279,94 @@ TEST_F(audit, thread)
> >  				&audit_tv_default, sizeof(audit_tv_default)));
> >  }
> >  
> > +/*
> > + * Verifies that log_subdomains_off set via the ruleset_fd=-1 path (without
> > + * creating a domain) is inherited by children across fork().  This exercises
> > + * the hook_cred_transfer() fix: the Landlock credential blob must be copied
> > + * even when the source credential has no domain.
> > + *
> > + * Phase 1 (baseline): a child without muting creates a domain and triggers a
> > + * denial that IS logged.
> > + *
> > + * Phase 2 (after muting): the parent mutes subdomain logs, forks another child
> > + * who creates a domain and triggers a denial that is NOT logged.
> > + */
> > +TEST_F(audit, log_subdomains_off_fork)
> > +{
> > +	const struct landlock_ruleset_attr ruleset_attr = {
> > +		.scoped = LANDLOCK_SCOPE_SIGNAL,
> > +	};
> > +	struct audit_records records;
> > +	int ruleset_fd, status;
> > +	pid_t child;
> > +
> > +	ruleset_fd =
> > +		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
> > +	ASSERT_LE(0, ruleset_fd);
> > +
> > +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
> > +
> > +	/*
> > +	 * Phase 1: forks a child that creates a domain and triggers a denial
> > +	 * before any muting.  This proves the audit path works.
> > +	 */
> > +	child = fork();
> > +	ASSERT_LE(0, child);
> > +	if (child == 0) {
> > +		ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
> > +		ASSERT_EQ(-1, kill(getppid(), 0));
> > +		ASSERT_EQ(EPERM, errno);
> > +		_exit(0);
> > +		return;
> > +	}
> > +
> > +	ASSERT_EQ(child, waitpid(child, &status, 0));
> > +	ASSERT_EQ(true, WIFEXITED(status));
> > +	ASSERT_EQ(0, WEXITSTATUS(status));
> > +
> > +	/* The denial must be logged (baseline). */
> > +	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, getpid(),
> > +					NULL));
> > +
> > +	/* Drains any remaining records (e.g. domain allocation). */
> > +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> > +
> > +	/*
> > +	 * Mutes subdomain logs without creating a domain.  The parent's
> > +	 * credential has domain=NULL and log_subdomains_off=1.
> > +	 */
> > +	ASSERT_EQ(0, landlock_restrict_self(
> > +			     -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
> > +
> > +	/*
> > +	 * Phase 2: forks a child that creates a domain and triggers a denial.
> > +	 * Because log_subdomains_off was inherited via fork(), the child's
> > +	 * domain has log_status=LANDLOCK_LOG_DISABLED.
> > +	 */
> > +	child = fork();
> > +	ASSERT_LE(0, child);
> > +	if (child == 0) {
> > +		ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
> > +		ASSERT_EQ(-1, kill(getppid(), 0));
> > +		ASSERT_EQ(EPERM, errno);
> > +		_exit(0);
> > +		return;
> > +	}
> > +
> > +	ASSERT_EQ(child, waitpid(child, &status, 0));
> > +	ASSERT_EQ(true, WIFEXITED(status));
> > +	ASSERT_EQ(0, WEXITSTATUS(status));
> > +
> > +	/* No denial record should appear. */
> > +	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
> > +					      getpid(), NULL));
> > +
> > +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> > +	EXPECT_EQ(0, records.access);
> > +
> > +	EXPECT_EQ(0, close(ruleset_fd));
> > +}
> > +
> >  FIXTURE(audit_flags)
> >  {
> >  	struct audit_filter audit_filter;
> > -- 
> > 2.53.0
> > 
> 
> Test looks fine.
> 
> While I do still think we should investigate the memory leak, this
> commit is, as it is, already a strict improvement over what we had
> before, so:
> 
> Reviewed-by: Günther Noack <gnoack3000@gmail.com>

I'll keep your tag if this patch is ok with you as-is.

> 
> –Günther
> 

^ permalink raw reply

* Re: [PATCH v4 3/3] selinux: fix overlayfs mmap() and mprotect() access checks
From: Paul Moore @ 2026-04-07 14:35 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: Ondrej Mosnacek, linux-security-module, selinux, linux-fsdevel,
	linux-unionfs, linux-erofs, Amir Goldstein, Gao Xiang,
	Christian Brauner
In-Reply-To: <CAEjxPJ4nqeuPhve3Fe-tFuNW9R5grnWwfYJv7q2cRu+UPQ5c4A@mail.gmail.com>

On Tue, Apr 7, 2026 at 8:14 AM Stephen Smalley
<stephen.smalley.work@gmail.com> wrote:
> On Thu, Apr 2, 2026 at 11:09 PM Paul Moore <paul@paul-moore.com> wrote:
> >
> > The existing SELinux security model for overlayfs is to allow access if
> > the current task is able to access the top level file (the "user" file)
> > and the mounter's credentials are sufficient to access the lower
> > level file (the "backing" file).  Unfortunately, the current code does
> > not properly enforce these access controls for both mmap() and mprotect()
> > operations on overlayfs filesystems.
> >
> > This patch makes use of the newly created security_mmap_backing_file()
> > LSM hook to provide the missing backing file enforcement for mmap()
> > operations, and leverages the backing file API and new LSM blob to
> > provide the necessary information to properly enforce the mprotect()
> > access controls.
> >
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Paul Moore <paul@paul-moore.com>
>
> Do you have tests for these changes showing the before and after (i.e.
> failing without your patches, passing with them)? I tried running an
> earlier set from Ondrej but they failed.

A few months ago I sent you and Ondrej some feedback on those early
tests from Ondrej, but yes, I also had problems with Ondrej's tests.
I've been using a hacked up combination of the existing tests, some of
Ondrej's additions, and an additional debug/test patch to ensure the
labeling is correct.  It's far from ideal, but I didn't invest time in
test development as I assumed Ondrej would continue his efforts there
(unfortunately it doesn't appear that he has?), and I wanted to focus
on getting a solution as soon as possible for obvious reasons.

-- 
paul-moore.com

^ permalink raw reply

* LSM: Whiteout chardev creation sidesteps mknod hook
From: Günther Noack @ 2026-04-07 13:05 UTC (permalink / raw)
  To: Christian Brauner, Mickaël Salaün, Paul Moore
  Cc: linux-security-module

Hello Christian, Paul, Mickaël and LSM maintainers!

I discovered the following bug in Landlock, which potentially also
affects other LSMs:

With renameat2(2)'s RENAME_WHITEOUT flag, it is possible to create a
"whiteout object" at the source of the rename.  Whiteout objects are
character devices with major/minor (0, 0) -- these devices are not
bound to any driver, so they are harmless, but still, the creation of
these files can sidestep the LANDLOCK_ACCESS_FS_MAKE_CHAR access right
in Landlock.


I am unconvinced which is the right fix here -- do you have an opinion
on this from the VFS/LSM side?


Option 1: Make filesystems call security_path_mknod() during RENAME_WHITEOUT?

Do it in the VFS rename hook.

* Pro: Fixes it for all LSMs
* Con: Call would have to be done in multiple filesystems


Option 2: Handle it in security_{path,inode}_rename()

Make Landlock handle it in security_inode_rename() by looking for the
RENAME_WHITEOUT flag.

* Con: Operation should only be denied if the file system even
  implements RENAME_WHITEOUT, and we would have to maintain a list of
  affected filesystems for that.  (That feels like solving it at the
  wrong layer of abstraction.)
* Con: Unclear whether other LSMs need a similar fix


Option 3: Declare that this is working as intended?

* Pro: (0, 0) is not a "real" character device


In cases 1 and 2, we'd likely need to double check that we are not
breaking existing scenarios involving OverlayFS, by suddenly requiring
a more lax policy for creating character devices on these directories.

Please let me know what you think.  I'm specifically interested in:

1. Christian: What is the appropriate way to do this VFS wise?
2. LSM maintainers: Is this a bug that affects other LSMs as well?

Thanks,
—Günther

P.S.: For full transparency, I found this bug by pointing Google
Gemini at the Landlock codebase.

^ permalink raw reply

* Re: [PATCH v2 12/17] landlock: Add tracepoints for ptrace and scope denials
From: Mickaël Salaün @ 2026-04-07 13:00 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Christian Brauner, Günther Noack, Jann Horn, Jeff Xu,
	Justin Suess, Kees Cook, Masami Hiramatsu, Mathieu Desnoyers,
	Matthieu Buffet, Mikhail Ivanov, Tingmao Wang, kernel-team,
	linux-fsdevel, linux-security-module, linux-trace-kernel
In-Reply-To: <20260406110123.4072a765@gandalf.local.home>

On Mon, Apr 06, 2026 at 11:01:23AM -0400, Steven Rostedt wrote:
> On Mon,  6 Apr 2026 16:37:10 +0200
> Mickaël Salaün <mic@digikod.net> wrote:
> 
> > ---
> >  include/trace/events/landlock.h | 135 ++++++++++++++++++++++++++++++++
> >  security/landlock/log.c         |  20 +++++
> >  2 files changed, 155 insertions(+)
> > 
> > diff --git a/include/trace/events/landlock.h b/include/trace/events/landlock.h
> > index 1afab091efba..9f96c9897f44 100644
> > --- a/include/trace/events/landlock.h
> > +++ b/include/trace/events/landlock.h
> > @@ -11,6 +11,7 @@
> >  #define _TRACE_LANDLOCK_H
> >  
> >  #include <linux/tracepoint.h>
> > +#include <net/af_unix.h>
> >  
> >  struct dentry;
> >  struct landlock_domain;
> > @@ -19,6 +20,7 @@ struct landlock_rule;
> >  struct landlock_ruleset;
> >  struct path;
> >  struct sock;
> > +struct task_struct;
> >  
> >  /**
> >   * DOC: Landlock trace events
> > @@ -433,6 +435,139 @@ TRACE_EVENT(
> >  		__entry->log_new_exec, __entry->blockers, __entry->sport,
> >  		__entry->dport));
> >  
> > +/**
> > + * landlock_deny_ptrace - ptrace access denied
> > + * @hierarchy: Hierarchy node that blocked the access (never NULL)
> > + * @same_exec: Whether the current task is the same executable that called
> > + *             landlock_restrict_self() for the denying hierarchy node
> > + * @tracee: Target task (never NULL); eBPF can read pid, comm, cred,
> > + *          namespaces, and cgroup via BTF
> > + */
> > +TRACE_EVENT(
> > +	landlock_deny_ptrace,
> > +
> > +	TP_PROTO(const struct landlock_hierarchy *hierarchy, bool same_exec,
> > +		 const struct task_struct *tracee),
> > +
> > +	TP_ARGS(hierarchy, same_exec, tracee),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(__u64, domain_id) __field(bool, same_exec)
> > +			__field(u32, log_same_exec) __field(u32, log_new_exec)
> > +				__field(pid_t, tracee_pid)
> > +					__string(tracee_comm, tracee->comm)),
> 
> Event formats are different than normal macro formatting. Please use the
> event formatting. The above is a defined structure that is being created
> for use. Keep it looking like a structure:
> 
> 	TP_STRUCT__entry(
> 		__field(	__u64,		domain_id)
> 		__field(	bool,		same_exec)
> 		__field(	u32,		log_same_exec)
> 		__field(	u32,		log_new_exec)
> 		__field(	pid_t,		tracee_pid)
> 		__string(	tracee_comm,	tracee->comm)
> 	),

I was using clang-format, but it doesn't make sense here, I'll fix it.

> 
> See how the above resembles:
> 
> struct entry {
> 	__u64		domain_id;
> 	bool		same_exec;
> 	u32		log_same_exec;
> 	u32		log_new_exec;
> 	pid_t		tracee_pid;
> 	string		tracee_comm;
> };
> 
> Because that's pretty much what the trace event TP_STRUCT__entry() is going
> to do with it. (The string will obviously be something else).
> 
> This way it's also easy to spot wholes in the structure that is written
> into the ring buffer. The "same_exec" being a bool followed by two u32
> types, is going to cause a hole. Move it to between tracee_pid and
> tracee_comm.

Actually, the log_* field should be bool too.  Anyway, is it a concern
that the ring buffer leaks (previous event) kernel memory or is the
concern mostly about avoiding wasted space and making easy to spot holes
even if it's OK?

> 
> Please fix the other events too.

Sure. Thanks!

> 
> -- Steve
> 
> 
> > +
> > +	TP_fast_assign(__entry->domain_id = hierarchy->id;
> > +		       __entry->same_exec = same_exec;
> > +		       __entry->log_same_exec = hierarchy->log_same_exec;
> > +		       __entry->log_new_exec = hierarchy->log_new_exec;
> > +		       __entry->tracee_pid =
> > +			       task_tgid_nr((struct task_struct *)tracee);
> > +		       __assign_str(tracee_comm);),
> > +
> > +	TP_printk(
> > +		"domain=%llx same_exec=%d log_same_exec=%u log_new_exec=%u tracee_pid=%d comm=%s",
> > +		__entry->domain_id, __entry->same_exec, __entry->log_same_exec,
> > +		__entry->log_new_exec, __entry->tracee_pid,
> > +		__print_untrusted_str(tracee_comm)));

Are you OK with this new helper?

> > +
> >
> 

^ permalink raw reply

* Re: [PATCH v4 3/3] selinux: fix overlayfs mmap() and mprotect() access checks
From: Stephen Smalley @ 2026-04-07 12:14 UTC (permalink / raw)
  To: Paul Moore, Ondrej Mosnacek
  Cc: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
	linux-erofs, Amir Goldstein, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-8-paul@paul-moore.com>

On Thu, Apr 2, 2026 at 11:09 PM Paul Moore <paul@paul-moore.com> wrote:
>
> The existing SELinux security model for overlayfs is to allow access if
> the current task is able to access the top level file (the "user" file)
> and the mounter's credentials are sufficient to access the lower
> level file (the "backing" file).  Unfortunately, the current code does
> not properly enforce these access controls for both mmap() and mprotect()
> operations on overlayfs filesystems.
>
> This patch makes use of the newly created security_mmap_backing_file()
> LSM hook to provide the missing backing file enforcement for mmap()
> operations, and leverages the backing file API and new LSM blob to
> provide the necessary information to properly enforce the mprotect()
> access controls.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Paul Moore <paul@paul-moore.com>

Do you have tests for these changes showing the before and after (i.e.
failing without your patches, passing with them)? I tried running an
earlier set from Ondrej but they failed.

^ permalink raw reply

* Re: [PATCH v1 2/2] landlock: Allow TSYNC with LOG_SUBDOMAINS_OFF and fd=-1
From: Günther Noack @ 2026-04-07  8:25 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Günther Noack, linux-security-module, stable
In-Reply-To: <20260404085001.1604405-2-mic@digikod.net>

Hello!

On Sat, Apr 04, 2026 at 10:49:58AM +0200, Mickaël Salaün wrote:
> LANDLOCK_RESTRICT_SELF_TSYNC does not allow
> LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with ruleset_fd=-1, preventing
> a multithreaded process from atomically propagating subdomain log muting
> to all threads without creating a domain layer.  Relax the fd=-1
> condition to accept TSYNC alongside LOG_SUBDOMAINS_OFF, and update the
> documentation accordingly.
> 
> Add flag validation tests for all TSYNC combinations with ruleset_fd=-1,
> and audit tests verifying both transition directions: muting via TSYNC
> (logged to not logged) and override via TSYNC (not logged to logged).
> 
> Cc: Günther Noack <gnoack@google.com>
> Cc: stable@vger.kernel.org
> Fixes: 42fc7e6543f6 ("landlock: Multithreading support for landlock_restrict_self()")
> Signed-off-by: Mickaël Salaün <mic@digikod.net>
> ---
>  include/uapi/linux/landlock.h                 |   4 +-
>  security/landlock/syscalls.c                  |  14 +-
>  tools/testing/selftests/landlock/audit_test.c | 233 ++++++++++++++++++
>  tools/testing/selftests/landlock/tsync_test.c |  74 ++++++
>  4 files changed, 319 insertions(+), 6 deletions(-)
> 
> diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
> index f88fa1f68b77..d37603efc273 100644
> --- a/include/uapi/linux/landlock.h
> +++ b/include/uapi/linux/landlock.h
> @@ -116,7 +116,9 @@ struct landlock_ruleset_attr {
>   *     ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects
>   *     future nested domains, not the one being created. It can also be used
>   *     with a @ruleset_fd value of -1 to mute subdomain logs without creating a
> - *     domain.
> + *     domain.  When combined with %LANDLOCK_RESTRICT_SELF_TSYNC and a
> + *     @ruleset_fd value of -1, this configuration is propagated to all threads
> + *     of the current process.
>   *
>   * The following flag supports policy enforcement in multithreaded processes:
>   *
> diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
> index 0d66a68677b7..a0bb664e0d31 100644
> --- a/security/landlock/syscalls.c
> +++ b/security/landlock/syscalls.c
> @@ -512,10 +512,13 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
>  
>  	/*
>  	 * It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
> -	 * -1 as ruleset_fd, but no other flag must be set.
> +	 * -1 as ruleset_fd, optionally combined with
> +	 * LANDLOCK_RESTRICT_SELF_TSYNC to propagate this configuration to all
> +	 * threads.  No other flag must be set.
>  	 */
>  	if (!(ruleset_fd == -1 &&
> -	      flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
> +	      (flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) ==
> +		      LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {

Well spotted, thanks!


>  		/* Gets and checks the ruleset. */
>  		ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
>  		if (IS_ERR(ruleset))
> @@ -537,9 +540,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
>  
>  	/*
>  	 * The only case when a ruleset may not be set is if
> -	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1.
> -	 * We could optimize this case by not calling commit_creds() if this flag
> -	 * was already set, but it is not worth the complexity.
> +	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set (optionally with
> +	 * LANDLOCK_RESTRICT_SELF_TSYNC) and ruleset_fd is -1.  We could
> +	 * optimize this case by not calling commit_creds() if this flag was
> +	 * already set, but it is not worth the complexity.
>  	 */
>  	if (ruleset) {
>  		/*
> diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
> index 20099b8667e7..a193d8a97560 100644
> --- a/tools/testing/selftests/landlock/audit_test.c
> +++ b/tools/testing/selftests/landlock/audit_test.c
> @@ -162,6 +162,7 @@ TEST_F(audit, layers)
>  struct thread_data {
>  	pid_t parent_pid;
>  	int ruleset_fd, pipe_child, pipe_parent;
> +	bool mute_subdomains;
>  };
>  
>  static void *thread_audit_test(void *arg)
> @@ -367,6 +368,238 @@ TEST_F(audit, log_subdomains_off_fork)
>  	EXPECT_EQ(0, close(ruleset_fd));
>  }
>  
> +/*
> + * Thread function: runs two rounds of (create domain, trigger denial, signal
> + * back), waiting for the main thread before each round.  When mute_subdomains
> + * is set, phase 1 also mutes subdomain logs via the fd=-1 path before creating
> + * the domain.  The ruleset_fd is kept open across both rounds so each
> + * restrict_self call stacks a new domain layer.
> + */
> +static void *thread_sandbox_deny_twice(void *arg)
> +{
> +	const struct thread_data *data = (struct thread_data *)arg;
> +	uintptr_t err = 0;
> +	char buffer;
> +
> +	/* Phase 1: optionally mutes, creates a domain, and triggers a denial. */
> +	if (read(data->pipe_parent, &buffer, 1) != 1) {
> +		err = 1;
> +		goto out;
> +	}
> +
> +	if (data->mute_subdomains &&
> +	    landlock_restrict_self(-1,
> +				   LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
> +		err = 2;
> +		goto out;
> +	}
> +
> +	if (landlock_restrict_self(data->ruleset_fd, 0)) {
> +		err = 3;
> +		goto out;
> +	}
> +
> +	if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
> +		err = 4;
> +		goto out;
> +	}
> +
> +	if (write(data->pipe_child, ".", 1) != 1) {
> +		err = 5;
> +		goto out;
> +	}
> +
> +	/* Phase 2: stacks another domain and triggers a denial. */
> +	if (read(data->pipe_parent, &buffer, 1) != 1) {
> +		err = 6;
> +		goto out;
> +	}
> +
> +	if (landlock_restrict_self(data->ruleset_fd, 0)) {
> +		err = 7;
> +		goto out;
> +	}
> +
> +	if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
> +		err = 8;
> +		goto out;
> +	}
> +
> +	if (write(data->pipe_child, ".", 1) != 1) {
> +		err = 9;
> +		goto out;
> +	}
> +
> +out:
> +	close(data->ruleset_fd);
> +	close(data->pipe_child);
> +	close(data->pipe_parent);
> +	return (void *)err;
> +}
> +
> +/*
> + * Verifies that LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
> + * LANDLOCK_RESTRICT_SELF_TSYNC and ruleset_fd=-1 propagates log_subdomains_off
> + * to a sibling thread, suppressing audit logging on domains it subsequently
> + * creates.
> + *
> + * Phase 1 (before TSYNC) acts as an inline baseline: the sibling creates a
> + * domain and triggers a denial that IS logged.
> + *
> + * Phase 2 (after TSYNC) verifies suppression: the sibling stacks another domain
> + * and triggers a denial that is NOT logged.
> + */
> +TEST_F(audit, log_subdomains_off_tsync)
> +{
> +	const struct landlock_ruleset_attr ruleset_attr = {
> +		.scoped = LANDLOCK_SCOPE_SIGNAL,
> +	};
> +	struct audit_records records;
> +	struct thread_data child_data;

The child_data.mute_subdomains field stays uninitialized in this
function (and maybe others).  Please fix.

   struct thread_data child_data = {};


> +	int pipe_child[2], pipe_parent[2];
> +	char buffer;
> +	pthread_t thread;
> +	void *thread_ret;
> +
> +	child_data.parent_pid = getppid();
> +	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
> +	child_data.pipe_child = pipe_child[1];
> +	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
> +	child_data.pipe_parent = pipe_parent[0];
> +	child_data.ruleset_fd =
> +		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
> +	ASSERT_LE(0, child_data.ruleset_fd);
> +
> +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
> +
> +	/* Creates the sibling thread. */
> +	ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
> +				    &child_data));
> +
> +	/*
> +	 * Phase 1: the sibling creates a domain and triggers a denial before
> +	 * any log muting.  This proves the audit path works.
> +	 */
> +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> +
> +	/* The denial must be logged. */
> +	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
> +					child_data.parent_pid, NULL));
> +
> +	/* Drains any remaining records (e.g. domain allocation). */
> +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> +
> +	/*
> +	 * Mutes subdomain logs and propagates to the sibling thread via TSYNC,
> +	 * without creating a domain.
> +	 */
> +	ASSERT_EQ(0, landlock_restrict_self(
> +			     -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> +					 LANDLOCK_RESTRICT_SELF_TSYNC));
> +
> +	/*
> +	 * Phase 2: the sibling stacks another domain and triggers a denial.
> +	 * Because log_subdomains_off was propagated via TSYNC, the new domain
> +	 * has log_status=LANDLOCK_LOG_DISABLED.
> +	 */
> +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> +
> +	/* No denial record should appear. */
> +	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
> +					      child_data.parent_pid, NULL));
> +
> +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> +	EXPECT_EQ(0, records.access);
> +
> +	EXPECT_EQ(0, close(pipe_child[0]));
> +	EXPECT_EQ(0, close(pipe_parent[1]));
> +	ASSERT_EQ(0, pthread_join(thread, &thread_ret));
> +	EXPECT_EQ(NULL, thread_ret);
> +}
> +
> +/*
> + * Verifies that LANDLOCK_RESTRICT_SELF_TSYNC without
> + * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF overrides a sibling thread's
> + * log_subdomains_off, re-enabling audit logging on domains the sibling
> + * subsequently creates.
> + *
> + * Phase 1: the sibling sets log_subdomains_off, creates a muted domain, and
> + * triggers a denial that is NOT logged.
> + *
> + * Phase 2 (after TSYNC without LOG_SUBDOMAINS_OFF): the sibling stacks another
> + * domain and triggers a denial that IS logged, proving the muting was
> + * overridden.
> + */
> +TEST_F(audit, tsync_override_log_subdomains_off)
> +{
> +	const struct landlock_ruleset_attr ruleset_attr = {
> +		.scoped = LANDLOCK_SCOPE_SIGNAL,
> +	};
> +	struct audit_records records;
> +	struct thread_data child_data;
> +	int pipe_child[2], pipe_parent[2];
> +	char buffer;
> +	pthread_t thread;
> +	void *thread_ret;
> +
> +	child_data.parent_pid = getppid();
> +	ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
> +	child_data.pipe_child = pipe_child[1];
> +	ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
> +	child_data.pipe_parent = pipe_parent[0];
> +	child_data.ruleset_fd =
> +		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
> +	ASSERT_LE(0, child_data.ruleset_fd);
> +
> +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
> +
> +	child_data.mute_subdomains = true;
> +
> +	/* Creates the sibling thread. */
> +	ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
> +				    &child_data));
> +
> +	/*
> +	 * Phase 1: the sibling mutes subdomain logs, creates a domain, and
> +	 * triggers a denial.  The denial must not be logged.
> +	 */
> +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> +
> +	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
> +					      child_data.parent_pid, NULL));
> +
> +	/* Drains any remaining records. */
> +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> +	EXPECT_EQ(0, records.access);
> +
> +	/*
> +	 * Overrides the sibling's log_subdomains_off by calling TSYNC without
> +	 * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.
> +	 */
> +	ASSERT_EQ(0, landlock_restrict_self(child_data.ruleset_fd,
> +					    LANDLOCK_RESTRICT_SELF_TSYNC));
> +
> +	/*
> +	 * Phase 2: the sibling stacks another domain and triggers a denial.
> +	 * Because TSYNC replaced its log_subdomains_off with 0, the new domain
> +	 * has log_status=LANDLOCK_LOG_PENDING.
> +	 */
> +	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
> +	ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
> +
> +	/* The denial must be logged. */
> +	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
> +					child_data.parent_pid, NULL));
> +
> +	EXPECT_EQ(0, close(pipe_child[0]));
> +	EXPECT_EQ(0, close(pipe_parent[1]));
> +	ASSERT_EQ(0, pthread_join(thread, &thread_ret));
> +	EXPECT_EQ(NULL, thread_ret);
> +}
> +
>  FIXTURE(audit_flags)
>  {
>  	struct audit_filter audit_filter;
> diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
> index 2b9ad4f154f4..abc290271a1a 100644
> --- a/tools/testing/selftests/landlock/tsync_test.c
> +++ b/tools/testing/selftests/landlock/tsync_test.c
> @@ -247,4 +247,78 @@ TEST(tsync_interrupt)
>  	EXPECT_EQ(0, close(ruleset_fd));
>  }
>  
> +/* clang-format off */
> +FIXTURE(tsync_without_ruleset) {};
> +/* clang-format on */
> +
> +FIXTURE_VARIANT(tsync_without_ruleset)
> +{
> +	const __u32 flags;
> +	const int expected_errno;
> +};
> +
> +/* clang-format off */
> +FIXTURE_VARIANT_ADD(tsync_without_ruleset, tsync_only) {
> +	/* clang-format on */
> +	.flags = LANDLOCK_RESTRICT_SELF_TSYNC,
> +	.expected_errno = EBADF,
> +};
> +
> +/* clang-format off */
> +FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_same_exec_off) {
> +	/* clang-format on */
> +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> +		 LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
> +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> +	.expected_errno = EBADF,
> +};
> +
> +/* clang-format off */
> +FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_new_exec_on) {
> +	/* clang-format on */
> +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> +		 LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
> +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> +	.expected_errno = EBADF,
> +};
> +
> +/* clang-format off */
> +FIXTURE_VARIANT_ADD(tsync_without_ruleset, all_flags) {
> +	/* clang-format on */
> +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
> +		 LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
> +		 LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> +	.expected_errno = EBADF,
> +};
> +
> +/* clang-format off */
> +FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off) {
> +	/* clang-format on */
> +	.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
> +		 LANDLOCK_RESTRICT_SELF_TSYNC,
> +	.expected_errno = 0,
> +};
> +
> +FIXTURE_SETUP(tsync_without_ruleset)
> +{
> +}
> +
> +FIXTURE_TEARDOWN(tsync_without_ruleset)
> +{
> +}
> +
> +TEST_F(tsync_without_ruleset, check)
> +{
> +	int ret;
> +
> +	ret = landlock_restrict_self(-1, variant->flags);
> +	if (variant->expected_errno) {
> +		EXPECT_EQ(-1, ret);
> +		EXPECT_EQ(variant->expected_errno, errno);
> +	} else {
> +		EXPECT_EQ(0, ret);
> +	}
> +}

We are not setting the no_new_privs flag in this test, as we do in the
others.

no_new_privs or CAP_SYS_ADMIN are required in the implementation, even
when ruleset_fd == -1 and passing
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.

> +
>  TEST_HARNESS_MAIN
> -- 
> 2.53.0
> 

Reviewed-by: Günther Noack <gnoack3000@gmail.com>

But please fix the flaky test.

–Günther

^ permalink raw reply

* Re: [PATCH v1 1/2] landlock: Fix log_subdomains_off inheritance across fork()
From: Günther Noack @ 2026-04-07  7:30 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Günther Noack, linux-security-module, stable
In-Reply-To: <20260404085001.1604405-1-mic@digikod.net>

Hello!

On Sat, Apr 04, 2026 at 10:49:57AM +0200, Mickaël Salaün wrote:
> hook_cred_transfer() only copies the Landlock security blob when the
> source credential has a domain.  This is inconsistent with
> landlock_restrict_self() which can set log_subdomains_off on a
> credential without creating a domain (via the ruleset_fd=-1 path): the
> field is committed but not preserved across fork() because the child's
> prepare_creds() calls hook_cred_transfer() which skips the copy when
> domain is NULL.
> 
> This breaks the documented use case where a process mutes subdomain logs
> before forking sandboxed children: the children lose the muting and
> their domains produce unexpected audit records.
> 
> Fix this by unconditionally copying the Landlock credential blob.
> landlock_get_ruleset(NULL) is already a safe no-op.
> 
> Cc: Günther Noack <gnoack@google.com>
> Cc: stable@vger.kernel.org
> Fixes: ead9079f7569 ("landlock: Add LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF")
> Signed-off-by: Mickaël Salaün <mic@digikod.net>
> ---
>  security/landlock/cred.c                      |  6 +-
>  tools/testing/selftests/landlock/audit_test.c | 88 +++++++++++++++++++
>  2 files changed, 90 insertions(+), 4 deletions(-)
> 
> diff --git a/security/landlock/cred.c b/security/landlock/cred.c
> index 0cb3edde4d18..cc419de75cd6 100644
> --- a/security/landlock/cred.c
> +++ b/security/landlock/cred.c
> @@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cred *const new,
>  	const struct landlock_cred_security *const old_llcred =
>  		landlock_cred(old);
>  
> -	if (old_llcred->domain) {
> -		landlock_get_ruleset(old_llcred->domain);
> -		*landlock_cred(new) = *old_llcred;
> -	}
> +	landlock_get_ruleset(old_llcred->domain);
> +	*landlock_cred(new) = *old_llcred;

This fix looks correct for the hook_cred_prepare() case (and of
course, hook_cred_prepare() calls hook_cred_transfer() in Landlock).


But I'm afraid I might have spotted another issue here:

If I look at the code in security/keys/process_keys.c, where
security_tranfer_creds() is called, the "old" object is actually
already initialized, and if we are not checking for that, I think we
are leaking memory.

I would suggest to use the helper landlock_cred_copy() from cred.h for
that.  This one is anyway supposed to be the central place for this
copying logic, and it is safe to use with zeroed-out target objects
(because the put is safe for the NULL-pointer).

Maybe this is worth updating while we are at it?


>  }
>  
>  static int hook_cred_prepare(struct cred *const new,
> diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
> index 46d02d49835a..20099b8667e7 100644
> --- a/tools/testing/selftests/landlock/audit_test.c
> +++ b/tools/testing/selftests/landlock/audit_test.c
> @@ -279,6 +279,94 @@ TEST_F(audit, thread)
>  				&audit_tv_default, sizeof(audit_tv_default)));
>  }
>  
> +/*
> + * Verifies that log_subdomains_off set via the ruleset_fd=-1 path (without
> + * creating a domain) is inherited by children across fork().  This exercises
> + * the hook_cred_transfer() fix: the Landlock credential blob must be copied
> + * even when the source credential has no domain.
> + *
> + * Phase 1 (baseline): a child without muting creates a domain and triggers a
> + * denial that IS logged.
> + *
> + * Phase 2 (after muting): the parent mutes subdomain logs, forks another child
> + * who creates a domain and triggers a denial that is NOT logged.
> + */
> +TEST_F(audit, log_subdomains_off_fork)
> +{
> +	const struct landlock_ruleset_attr ruleset_attr = {
> +		.scoped = LANDLOCK_SCOPE_SIGNAL,
> +	};
> +	struct audit_records records;
> +	int ruleset_fd, status;
> +	pid_t child;
> +
> +	ruleset_fd =
> +		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
> +	ASSERT_LE(0, ruleset_fd);
> +
> +	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
> +
> +	/*
> +	 * Phase 1: forks a child that creates a domain and triggers a denial
> +	 * before any muting.  This proves the audit path works.
> +	 */
> +	child = fork();
> +	ASSERT_LE(0, child);
> +	if (child == 0) {
> +		ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
> +		ASSERT_EQ(-1, kill(getppid(), 0));
> +		ASSERT_EQ(EPERM, errno);
> +		_exit(0);
> +		return;
> +	}
> +
> +	ASSERT_EQ(child, waitpid(child, &status, 0));
> +	ASSERT_EQ(true, WIFEXITED(status));
> +	ASSERT_EQ(0, WEXITSTATUS(status));
> +
> +	/* The denial must be logged (baseline). */
> +	EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, getpid(),
> +					NULL));
> +
> +	/* Drains any remaining records (e.g. domain allocation). */
> +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> +
> +	/*
> +	 * Mutes subdomain logs without creating a domain.  The parent's
> +	 * credential has domain=NULL and log_subdomains_off=1.
> +	 */
> +	ASSERT_EQ(0, landlock_restrict_self(
> +			     -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
> +
> +	/*
> +	 * Phase 2: forks a child that creates a domain and triggers a denial.
> +	 * Because log_subdomains_off was inherited via fork(), the child's
> +	 * domain has log_status=LANDLOCK_LOG_DISABLED.
> +	 */
> +	child = fork();
> +	ASSERT_LE(0, child);
> +	if (child == 0) {
> +		ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
> +		ASSERT_EQ(-1, kill(getppid(), 0));
> +		ASSERT_EQ(EPERM, errno);
> +		_exit(0);
> +		return;
> +	}
> +
> +	ASSERT_EQ(child, waitpid(child, &status, 0));
> +	ASSERT_EQ(true, WIFEXITED(status));
> +	ASSERT_EQ(0, WEXITSTATUS(status));
> +
> +	/* No denial record should appear. */
> +	EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
> +					      getpid(), NULL));
> +
> +	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
> +	EXPECT_EQ(0, records.access);
> +
> +	EXPECT_EQ(0, close(ruleset_fd));
> +}
> +
>  FIXTURE(audit_flags)
>  {
>  	struct audit_filter audit_filter;
> -- 
> 2.53.0
> 

Test looks fine.

While I do still think we should investigate the memory leak, this
commit is, as it is, already a strict improvement over what we had
before, so:

Reviewed-by: Günther Noack <gnoack3000@gmail.com>

–Günther

^ permalink raw reply

* [PATCH] evm: zero-initialize the evm_xattrs read buffer
From: Pengpeng Hou @ 2026-04-07  6:09 UTC (permalink / raw)
  To: Mimi Zohar, Roberto Sassu
  Cc: Dmitry Kasatkin, Eric Snowberg, Paul Moore, James Morris,
	Serge Hallyn, linux-integrity, linux-security-module,
	linux-kernel, pengpeng

evm_read_xattrs() allocates size + 1 bytes, fills them from the list of
enabled xattrs and then passes strlen(temp) to simple_read_from_buffer().
When no configured xattrs are enabled, the fill loop stores nothing and
temp[0] remains uninitialized, so strlen() reads beyond initialized
memory.

Use kzalloc() so the empty-list case stays a valid empty C string.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
 security/integrity/evm/evm_secfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index acd840461902..03d376fa36c2 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -145,7 +145,7 @@ static ssize_t evm_read_xattrs(struct file *filp, char __user *buf,
 		size += strlen(xattr->name) + 1;
 	}
 
-	temp = kmalloc(size + 1, GFP_KERNEL);
+	temp = kzalloc(size + 1, GFP_KERNEL);
 	if (!temp) {
 		mutex_unlock(&xattr_list_mutex);
 		return -ENOMEM;
-- 
2.50.1 (Apple Git-155)



^ permalink raw reply related

* Re: [PATCH v2] KEYS: trusted: Debugging as a feature
From: Nayna Jain @ 2026-04-07  2:42 UTC (permalink / raw)
  To: Jarkko Sakkinen, linux-integrity
  Cc: keyrings, Srish Srinivasan, James Bottomley, Mimi Zohar,
	David Howells, Paul Moore, James Morris, Serge E. Hallyn,
	Ahmad Fatoum, Pengutronix Kernel Team, open list,
	open list:SECURITY SUBSYSTEM
In-Reply-To: <20260324110043.67248-1-jarkko@kernel.org>


On 3/24/26 7:00 AM, Jarkko Sakkinen wrote:
> TPM_DEBUG, and other similar flags, are a non-standard way to specify a
> feature in Linux kernel.  Introduce CONFIG_TRUSTED_KEYS_DEBUG for
> trusted keys, and use it to replace these ad-hoc feature flags.
>
> Given that trusted keys debug dumps can contain sensitive data, harden
> the feature as follows:
>
> 1. In the Kconfig description postulate that pr_debug() statements must be
>     used.
> 2. Use pr_debug() statements in TPM 1.x driver to print the protocol dump.
>
> Traces, when actually needed, can be easily enabled by providing
> trusted.dyndbg='+p' in the kernel command-line.
>
> Cc: Srish Srinivasan <ssrish@linux.ibm.com>
> Reported-by: Nayna Jain <nayna@linux.ibm.com>
> Closes: https://lore.kernel.org/all/7f8b8478-5cd8-4d97-bfd0-341fd5cf10f9@linux.ibm.com/
> Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> ---
> v2:
> - Implement for all trusted keys backends.
> - Add HAVE_TRUSTED_KEYS_DEBUG as it is a good practice despite full
>    coverage.
> ---
>   include/keys/trusted-type.h               | 18 +++++-------
>   security/keys/trusted-keys/Kconfig        | 19 ++++++++++++
>   security/keys/trusted-keys/trusted_caam.c |  4 +--
>   security/keys/trusted-keys/trusted_tpm1.c | 36 +++++++++++------------
>   4 files changed, 46 insertions(+), 31 deletions(-)
>
> diff --git a/include/keys/trusted-type.h b/include/keys/trusted-type.h
> index 03527162613f..620a1f890b6b 100644
> --- a/include/keys/trusted-type.h
> +++ b/include/keys/trusted-type.h
> @@ -83,18 +83,16 @@ struct trusted_key_source {
>   
>   extern struct key_type key_type_trusted;
>   
> -#define TRUSTED_DEBUG 0
> -
> -#if TRUSTED_DEBUG
> +#ifdef CONFIG_TRUSTED_KEYS_DEBUG
>   static inline void dump_payload(struct trusted_key_payload *p)
>   {
> -	pr_info("key_len %d\n", p->key_len);
> -	print_hex_dump(KERN_INFO, "key ", DUMP_PREFIX_NONE,
> -		       16, 1, p->key, p->key_len, 0);
> -	pr_info("bloblen %d\n", p->blob_len);
> -	print_hex_dump(KERN_INFO, "blob ", DUMP_PREFIX_NONE,
> -		       16, 1, p->blob, p->blob_len, 0);
> -	pr_info("migratable %d\n", p->migratable);
> +	pr_debug("key_len %d\n", p->key_len);
> +	print_hex_dump_debug("key ", DUMP_PREFIX_NONE,
> +			     16, 1, p->key, p->key_len, 0);
> +	pr_debug("bloblen %d\n", p->blob_len);
> +	print_hex_dump_debug("blob ", DUMP_PREFIX_NONE,
> +			     16, 1, p->blob, p->blob_len, 0);
> +	pr_debug("migratable %d\n", p->migratable);
>   }
>   #else
>   static inline void dump_payload(struct trusted_key_payload *p)
> diff --git a/security/keys/trusted-keys/Kconfig b/security/keys/trusted-keys/Kconfig
> index 9e00482d886a..2ad9ba0e03f1 100644
> --- a/security/keys/trusted-keys/Kconfig
> +++ b/security/keys/trusted-keys/Kconfig
> @@ -1,10 +1,25 @@
>   config HAVE_TRUSTED_KEYS
>   	bool
>   
> +config HAVE_TRUSTED_KEYS_DEBUG
> +	bool
> +
> +config TRUSTED_KEYS_DEBUG
> +	bool "Debug trusted keys"
> +	depends on HAVE_TRUSTED_KEYS_DEBUG
> +	default n
> +	help
> +	  Trusted keys backends and core code that support debug dumps
> +	  can opt-in that feature here. Dumps must only use DEBUG
> +	  level output, as sensitive data may pass by. In the
> +	  kernel-command line traces can be enabled via
> +	  trusted.dyndbg='+p'.

Would it be good idea to add an explicit note/warning:


NOTE: This option is intended for debugging purposes only. Do not enable 
on production systems as debug output may expose sensitive cryptographic 
material.
If you are unsure, say N.

Apart from this, looks good to me.

Reviewed-by: Nayna Jain <nayna@linux.ibm.com>


^ permalink raw reply

* Re: [PATCH 1/3] crypto: public_key: Remove check for valid hash_algo for ML-DSA keys
From: Eric Biggers @ 2026-04-06 16:53 UTC (permalink / raw)
  To: Stefan Berger
  Cc: linux-integrity, linux-security-module, linux-kernel, zohar,
	roberto.sassu, David Howells, Lukas Wunner, Ignat Korchagin,
	keyrings, linux-crypto
In-Reply-To: <20260405231224.4008298-2-stefanb@linux.ibm.com>

On Sun, Apr 05, 2026 at 07:12:22PM -0400, Stefan Berger wrote:
> Remove the check for the hash_algo since ML-DSA is only used in pure mode
> and there is no relevance of a hash_algo for the input data.
> 
> Cc: David Howells <dhowells@redhat.com>
> Cc: Lukas Wunner <lukas@wunner.de>
> Cc: Ignat Korchagin <ignat@linux.win>
> Cc: keyrings@vger.kernel.org
> Cc: linux-crypto@vger.kernel.org
> Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
> ---
>  crypto/asymmetric_keys/public_key.c | 5 -----
>  1 file changed, 5 deletions(-)
> 
> diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
> index 09a0b83d5d77..df6918a77ab8 100644
> --- a/crypto/asymmetric_keys/public_key.c
> +++ b/crypto/asymmetric_keys/public_key.c
> @@ -147,11 +147,6 @@ software_key_determine_akcipher(const struct public_key *pkey,
>  		   strcmp(pkey->pkey_algo, "mldsa87") == 0) {
>  		if (strcmp(encoding, "raw") != 0)
>  			return -EINVAL;
> -		if (!hash_algo)
> -			return -EINVAL;
> -		if (strcmp(hash_algo, "none") != 0 &&
> -		    strcmp(hash_algo, "sha512") != 0)
> -			return -EINVAL;

Does this broaden which hash algorithms are accepted for CMS signatures
that use ML-DSA and contain signed attributes?

- Eric

^ permalink raw reply

* [PATCH v8 9/9] selftests/landlock: Add tests for invalid use of quiet flag
From: Tingmao Wang @ 2026-04-06 15:52 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Tingmao Wang, Günther Noack, Justin Suess, Jan Kara,
	Abhinav Saxena, linux-security-module
In-Reply-To: <cover.1775490344.git.m@maowtm.org>

Signed-off-by: Tingmao Wang <m@maowtm.org>
---

Changes in v4:
- New patch

 tools/testing/selftests/landlock/base_test.c | 57 ++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 84e91fcaa1b2..af9ad822a444 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -526,4 +526,61 @@ TEST(cred_transfer)
 	EXPECT_EQ(EACCES, errno);
 }
 
+TEST(useless_quiet_rule)
+{
+	struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+		.quiet_access_fs = 0,
+	};
+	struct landlock_path_beneath_attr path_beneath_attr = {
+		.allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+	};
+	int ruleset_fd, root_fd;
+
+	drop_caps(_metadata);
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	root_fd = open("/", O_PATH | O_CLOEXEC);
+	ASSERT_LE(0, root_fd);
+	path_beneath_attr.parent_fd = root_fd;
+	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+					&path_beneath_attr,
+					LANDLOCK_ADD_RULE_QUIET));
+	ASSERT_EQ(EINVAL, errno);
+
+	/* Check that the rule had not been added. */
+	ASSERT_EQ(0, close(root_fd));
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+	ASSERT_EQ(EACCES, errno);
+}
+
+TEST(invalid_quiet_bits_1)
+{
+	struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+		.quiet_access_fs = LANDLOCK_ACCESS_FS_WRITE_FILE,
+	};
+
+	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+					      sizeof(ruleset_attr), 0));
+	ASSERT_EQ(EINVAL, errno);
+}
+
+TEST(invalid_quiet_bits_2)
+{
+	struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+		.quiet_access_fs = 1ULL << 63,
+	};
+
+	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+					      sizeof(ruleset_attr), 0));
+	ASSERT_EQ(EINVAL, errno);
+}
+
 TEST_HARNESS_MAIN
-- 
2.53.0

^ permalink raw reply related

* [PATCH v8 8/9] selftests/landlock: Add tests for quiet flag with scope
From: Tingmao Wang @ 2026-04-06 15:52 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Tingmao Wang, Günther Noack, Justin Suess, Jan Kara,
	Abhinav Saxena, linux-security-module
In-Reply-To: <cover.1775490344.git.m@maowtm.org>

Enhance scoped_audit.connect_to_child and audit_flags.signal to test
interaction with various quiet flag settings.

Signed-off-by: Tingmao Wang <m@maowtm.org>
---

Changes in v4:
- New patch

 tools/testing/selftests/landlock/audit_test.c | 25 ++++--
 .../landlock/scoped_abstract_unix_test.c      | 77 ++++++++++++++++---
 2 files changed, 87 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index 36b0e750e889..a0f51e3e93b1 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -607,30 +607,42 @@ FIXTURE(audit_flags)
 FIXTURE_VARIANT(audit_flags)
 {
 	const int restrict_flags;
+	const __u64 quiet_scoped;
 };
 
 /* clang-format off */
 FIXTURE_VARIANT_ADD(audit_flags, default) {
 	/* clang-format on */
 	.restrict_flags = 0,
+	.quiet_scoped = 0,
 };
 
 /* clang-format off */
 FIXTURE_VARIANT_ADD(audit_flags, same_exec_off) {
 	/* clang-format on */
 	.restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+	.quiet_scoped = 0,
 };
 
 /* clang-format off */
 FIXTURE_VARIANT_ADD(audit_flags, subdomains_off) {
 	/* clang-format on */
 	.restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+	.quiet_scoped = 0,
 };
 
 /* clang-format off */
 FIXTURE_VARIANT_ADD(audit_flags, cross_exec_on) {
 	/* clang-format on */
 	.restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+	.quiet_scoped = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, signal_quieted) {
+	/* clang-format on */
+	.restrict_flags = 0,
+	.quiet_scoped = LANDLOCK_SCOPE_SIGNAL,
 };
 
 FIXTURE_SETUP(audit_flags)
@@ -674,12 +686,16 @@ TEST_F(audit_flags, signal)
 	pid_t child;
 	struct audit_records records;
 	__u64 deallocated_dom = 2;
+	bool expect_audit = !(variant->restrict_flags &
+			      LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) &&
+			    !(variant->quiet_scoped & LANDLOCK_SCOPE_SIGNAL);
 
 	child = fork();
 	ASSERT_LE(0, child);
 	if (child == 0) {
 		const struct landlock_ruleset_attr ruleset_attr = {
 			.scoped = LANDLOCK_SCOPE_SIGNAL,
+			.quiet_scoped = variant->quiet_scoped,
 		};
 		int ruleset_fd;
 
@@ -696,8 +712,7 @@ TEST_F(audit_flags, signal)
 		EXPECT_EQ(-1, kill(getppid(), 0));
 		EXPECT_EQ(EPERM, errno);
 
-		if (variant->restrict_flags &
-		    LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+		if (!expect_audit) {
 			EXPECT_EQ(-EAGAIN, matches_log_signal(
 						   _metadata, self->audit_fd,
 						   getppid(), self->domain_id));
@@ -724,8 +739,7 @@ TEST_F(audit_flags, signal)
 
 		/* Makes sure there is no superfluous logged records. */
 		EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
-		if (variant->restrict_flags &
-		    LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+		if (!expect_audit) {
 			EXPECT_EQ(0, records.access);
 		} else {
 			EXPECT_EQ(1, records.access);
@@ -748,8 +762,7 @@ TEST_F(audit_flags, signal)
 	    WEXITSTATUS(status) != EXIT_SUCCESS)
 		_metadata->exit_code = KSFT_FAIL;
 
-	if (variant->restrict_flags &
-	    LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+	if (!expect_audit) {
 		/*
 		 * No deallocation record: denials=0 never matches a real
 		 * record.
diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
index c47491d2d1c1..ac456185b835 100644
--- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
+++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
@@ -293,6 +293,45 @@ FIXTURE_TEARDOWN_PARENT(scoped_audit)
 	EXPECT_EQ(0, audit_cleanup(-1, NULL));
 }
 
+FIXTURE_VARIANT(scoped_audit)
+{
+	const __u64 scoped;
+	const __u64 quiet_scoped;
+};
+
+// clang-format off
+FIXTURE_VARIANT_ADD(scoped_audit, no_quiet)
+{
+	// clang-format on
+	.scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+	.quiet_scoped = 0,
+};
+
+// clang-format off
+FIXTURE_VARIANT_ADD(scoped_audit, quiet_abstract_socket)
+{
+	// clang-format on
+	.scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+	.quiet_scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET,
+};
+
+// clang-format off
+FIXTURE_VARIANT_ADD(scoped_audit, quiet_abstract_socket_2)
+{
+	// clang-format on
+	.scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL,
+	.quiet_scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
+			LANDLOCK_SCOPE_SIGNAL,
+};
+
+// clang-format off
+FIXTURE_VARIANT_ADD(scoped_audit, quiet_unrelated)
+{
+	// clang-format on
+	.scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL,
+	.quiet_scoped = LANDLOCK_SCOPE_SIGNAL,
+};
+
 /* python -c 'print(b"\0selftests-landlock-abstract-unix-".hex().upper())' */
 #define ABSTRACT_SOCKET_PATH_PREFIX \
 	"0073656C6674657374732D6C616E646C6F636B2D61627374726163742D756E69782D"
@@ -308,6 +347,13 @@ TEST_F(scoped_audit, connect_to_child)
 	char buf;
 	int dgram_client;
 	struct audit_records records;
+	int ruleset_fd;
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.scoped = variant->scoped,
+		.quiet_scoped = variant->quiet_scoped,
+	};
+	bool should_audit =
+		!(variant->quiet_scoped & LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
 
 	/* Makes sure there is no superfluous logged records. */
 	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
@@ -344,7 +390,14 @@ TEST_F(scoped_audit, connect_to_child)
 	EXPECT_EQ(0, close(pipe_child[1]));
 	EXPECT_EQ(0, close(pipe_parent[0]));
 
-	create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd)
+	{
+		TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+	}
+	enforce_ruleset(_metadata, ruleset_fd);
+	EXPECT_EQ(0, close(ruleset_fd));
 
 	/* Signals that the parent is in a domain, if any. */
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
@@ -359,14 +412,20 @@ TEST_F(scoped_audit, connect_to_child)
 	EXPECT_EQ(-1, err_dgram);
 	EXPECT_EQ(EPERM, errno);
 
-	EXPECT_EQ(
-		0,
-		audit_match_record(
-			self->audit_fd, AUDIT_LANDLOCK_ACCESS,
-			REGEX_LANDLOCK_PREFIX
-			" blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX
-			"[0-9A-F]\\+$",
-			NULL));
+	if (should_audit) {
+		EXPECT_EQ(
+			0,
+			audit_match_record(
+				self->audit_fd, AUDIT_LANDLOCK_ACCESS,
+				REGEX_LANDLOCK_PREFIX
+				" blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX
+				"[0-9A-F]\\+$",
+				NULL));
+	}
+
+	/* No other logs */
+	EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+	EXPECT_EQ(0, records.access);
 
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
 	EXPECT_EQ(0, close(dgram_client));
-- 
2.53.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox