* [PATCH v11 3/9] landlock: Suppress logging when quiet flag is present
From: Tingmao Wang @ 2026-06-12 1:48 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Tingmao Wang, Günther Noack, Justin Suess, Jan Kara,
Abhinav Saxena, linux-security-module
In-Reply-To: <cover.1781228815.git.m@maowtm.org>
The quietness behaviour is as documented in the previous patch.
For optional accesses, since the existing deny_masks can only store 2x4bit
of layer index, with no way to represent "no layer", we need to either
expand it or have another field to correctly handle quieting of those.
This commit uses the latter approach - we add another field to store which
optional access (of the 2) are covered by quiet rules in their respective
layers as stored in deny_masks.
Assisted-by: GitHub-Copilot:claude-opus-4.8 copilot-review
Signed-off-by: Tingmao Wang <m@maowtm.org>
---
Changes in v11:
- Add quiet_optional_accesses invariant check in is_valid_request
- Enhance docstring on landlock_get_quiet_optional_accesses
- Don't use bitfields for fown_layer and quiet_optional_accesses
- Also remove the newly added build_check_file_security() and use
bits_per_field based check for quiet_optional_accesses since we now
don't have a bitfield
Changes in v10:
- clang-format header file changes too
- Fix grammar in some comments
Changes in v9:
- Fix conflict
- Applied struct layer_masks changes to this as well.
- Replace 4 with HWEIGHT(LANDLOCK_MAX_NUM_LAYERS - 1) in
landlock_get_quiet_optional_accesses()
- Replace 4 with HWEIGHT in (existing) get_layer_from_deny_masks as
well.
- Use optional_access_t typedef for all quiet_optional_accesses values
instead of u8
Changes in v8:
- Rebase on top of mic/next
- Populate request.rule_flags in hook_unix_find()
Changes in v7:
- Following change in commit 1, now we need to copy rule_flags into
landlock_request before calling landlock_log_denial for relevant fs
denials
- Remove left over param comment
Changes in v5:
- Update code style and comment in get_layer_from_deny_masks() and
landlock_log_denial()
- Now that rule_flags is moved into landlock_request, this version removes
the extra parameter for landlock_log_denial and gets rid of
no_rule_flags, simplifying some code.
- Fix build failure without CONFIG_AUDIT (reported by Justin Suess)
Changes in v3:
- Renamed patch title from "Check for quiet flag in landlock_log_denial"
to this given the growth.
- Moved quiet bit check after domain_exec check
- Rename, style and comment fixes suggested by Mickaël.
- Squashed patch 6/6 from v2 "Implement quiet for optional accesses" into
this one. Changes to that below:
- Refactor the quiet flag setting in get_layer_from_deny_masks() to be
more clear.
- Add KUnit tests
- Fix comments, add WARN_ON_ONCE, use __const_hweight64() as suggested by
review
- Move build_check_file_security to fs.c
- Use a typedef for quiet_optional_accesses, add static_assert, and
improve docs on landlock_get_quiet_optional_accesses.
Changes in v2:
- Supports the new quiet access masks.
- Support quieting scope requests (but not ptrace and attempted mounting
for now)
security/landlock/access.h | 5 +
security/landlock/audit.c | 268 ++++++++++++++++++++++++++++++++++---
security/landlock/audit.h | 1 +
security/landlock/domain.c | 38 ++++++
security/landlock/domain.h | 4 +
security/landlock/fs.c | 6 +
security/landlock/fs.h | 19 ++-
security/landlock/net.c | 15 +--
8 files changed, 330 insertions(+), 26 deletions(-)
diff --git a/security/landlock/access.h b/security/landlock/access.h
index 94f4b9fb7238..e81164876c7d 100644
--- a/security/landlock/access.h
+++ b/security/landlock/access.h
@@ -143,4 +143,9 @@ static inline bool access_mask_subset(access_mask_t subset,
return (subset | superset) == superset;
}
+/* A bitmask that is large enough to hold set of optional accesses. */
+typedef u8 optional_access_t;
+static_assert(BITS_PER_TYPE(optional_access_t) >=
+ HWEIGHT(_LANDLOCK_ACCESS_FS_OPTIONAL));
+
#endif /* _SECURITY_LANDLOCK_ACCESS_H */
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index 8c56f7f6467a..738d8c810b2c 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -249,7 +249,9 @@ static void test_get_denied_layer(struct kunit *const test)
static size_t
get_layer_from_deny_masks(access_mask_t *const access_request,
const access_mask_t all_existing_optional_access,
- const deny_masks_t deny_masks)
+ const deny_masks_t deny_masks,
+ optional_access_t quiet_optional_accesses,
+ bool *quiet)
{
const unsigned long access_opt = all_existing_optional_access;
const unsigned long access_req = *access_request;
@@ -257,6 +259,7 @@ get_layer_from_deny_masks(access_mask_t *const access_request,
size_t youngest_layer = 0;
size_t access_index = 0;
unsigned long access_bit;
+ bool should_quiet = false;
/* This will require change with new object types. */
WARN_ON_ONCE(access_opt != _LANDLOCK_ACCESS_FS_OPTIONAL);
@@ -265,20 +268,33 @@ get_layer_from_deny_masks(access_mask_t *const access_request,
BITS_PER_TYPE(access_mask_t)) {
if (access_req & BIT(access_bit)) {
const size_t layer =
- (deny_masks >> (access_index * 4)) &
+ (deny_masks >>
+ (access_index *
+ HWEIGHT(LANDLOCK_MAX_NUM_LAYERS - 1))) &
(LANDLOCK_MAX_NUM_LAYERS - 1);
+ const bool layer_has_quiet =
+ !!(quiet_optional_accesses & BIT(access_index));
if (layer > youngest_layer) {
youngest_layer = layer;
missing = BIT(access_bit);
+ should_quiet = layer_has_quiet;
} else if (layer == youngest_layer) {
missing |= BIT(access_bit);
+ /*
+ * Whether the layer has rules with quiet flag covering
+ * the file accessed does not depend on the access, and so
+ * the following WARN_ON_ONCE() should not fail.
+ */
+ WARN_ON_ONCE(should_quiet && !layer_has_quiet);
+ should_quiet = layer_has_quiet;
}
}
access_index++;
}
*access_request = missing;
+ *quiet = should_quiet;
return youngest_layer;
}
@@ -288,42 +304,188 @@ static void test_get_layer_from_deny_masks(struct kunit *const test)
{
deny_masks_t deny_mask;
access_mask_t access;
+ optional_access_t quiet_optional_accesses;
+ bool quiet;
/* truncate:0 ioctl_dev:2 */
deny_mask = 0x20;
+ quiet_optional_accesses = 0;
access = LANDLOCK_ACCESS_FS_TRUNCATE;
KUNIT_EXPECT_EQ(test, 0,
- get_layer_from_deny_masks(&access,
- _LANDLOCK_ACCESS_FS_OPTIONAL,
- deny_mask));
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ access = LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, false);
access = LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV;
KUNIT_EXPECT_EQ(test, 2,
- get_layer_from_deny_masks(&access,
- _LANDLOCK_ACCESS_FS_OPTIONAL,
- deny_mask));
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ /* layer denying truncate: quiet, ioctl: not quiet */
+ quiet_optional_accesses = 0b01;
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE;
+ KUNIT_EXPECT_EQ(test, 0,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, true);
+
+ access = LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ /* Reverse order - truncate:2 ioctl_dev:0 */
+ deny_mask = 0x02;
+ quiet_optional_accesses = 0;
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ access = LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 0,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ /* layer denying truncate: quiet, ioctl: not quiet */
+ quiet_optional_accesses = 0b01;
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, true);
+
+ access = LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 0,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, true);
+
+ /* layer denying truncate: not quiet, ioctl: quiet */
+ quiet_optional_accesses = 0b10;
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ access = LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 0,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, true);
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 2,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, false);
/* truncate:15 ioctl_dev:15 */
deny_mask = 0xff;
+ quiet_optional_accesses = 0;
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE;
+ KUNIT_EXPECT_EQ(test, 15,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ access = LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV;
+ KUNIT_EXPECT_EQ(test, 15,
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
+ KUNIT_EXPECT_EQ(test, access,
+ LANDLOCK_ACCESS_FS_TRUNCATE |
+ LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, false);
+
+ /* Both quiet (same layer so quietness must be the same) */
+ quiet_optional_accesses = 0b11;
access = LANDLOCK_ACCESS_FS_TRUNCATE;
KUNIT_EXPECT_EQ(test, 15,
- get_layer_from_deny_masks(&access,
- _LANDLOCK_ACCESS_FS_OPTIONAL,
- deny_mask));
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_TRUNCATE);
+ KUNIT_EXPECT_EQ(test, quiet, true);
access = LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV;
KUNIT_EXPECT_EQ(test, 15,
- get_layer_from_deny_masks(&access,
- _LANDLOCK_ACCESS_FS_OPTIONAL,
- deny_mask));
+ get_layer_from_deny_masks(
+ &access, _LANDLOCK_ACCESS_FS_OPTIONAL,
+ deny_mask, quiet_optional_accesses, &quiet));
KUNIT_EXPECT_EQ(test, access,
LANDLOCK_ACCESS_FS_TRUNCATE |
LANDLOCK_ACCESS_FS_IOCTL_DEV);
+ KUNIT_EXPECT_EQ(test, quiet, true);
}
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
@@ -349,11 +511,34 @@ static bool is_valid_request(const struct landlock_request *const request)
if (request->deny_masks) {
if (WARN_ON_ONCE(!request->all_existing_optional_access))
return false;
+ static_assert(sizeof(request->all_existing_optional_access) ==
+ sizeof(u32));
+ if (WARN_ON_ONCE(
+ request->quiet_optional_accesses >=
+ BIT(hweight32(
+ request->all_existing_optional_access))))
+ return false;
}
return true;
}
+static access_mask_t
+pick_access_mask_for_request_type(const enum landlock_request_type type,
+ const struct access_masks access_masks)
+{
+ switch (type) {
+ case LANDLOCK_REQUEST_FS_ACCESS:
+ return access_masks.fs;
+ case LANDLOCK_REQUEST_NET_ACCESS:
+ return access_masks.net;
+ default:
+ WARN_ONCE(1, "Invalid request type %d passed to %s", type,
+ __func__);
+ return 0;
+ }
+}
+
/**
* landlock_log_denial - Create audit records related to a denial
*
@@ -367,6 +552,7 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
struct landlock_hierarchy *youngest_denied;
size_t youngest_layer;
access_mask_t missing;
+ bool object_quiet_flag = false, quiet_applicable_to_access = false;
if (WARN_ON_ONCE(!subject || !subject->domain ||
!subject->domain->hierarchy || !request))
@@ -382,10 +568,15 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
youngest_layer = get_denied_layer(subject->domain,
&missing,
request->layer_masks);
+ object_quiet_flag =
+ request->layer_masks->layers[youngest_layer]
+ .quiet;
} else {
youngest_layer = get_layer_from_deny_masks(
&missing, _LANDLOCK_ACCESS_FS_OPTIONAL,
- request->deny_masks);
+ request->deny_masks,
+ request->quiet_optional_accesses,
+ &object_quiet_flag);
}
youngest_denied =
get_hierarchy(subject->domain, youngest_layer);
@@ -420,6 +611,53 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
return;
}
+ /*
+ * Checks if the object is marked quiet by the layer that denied the
+ * request. If it's a different layer that marked it as quiet, but
+ * that layer is not the one that denied the request, we should still
+ * audit log the denial.
+ */
+ if (object_quiet_flag) {
+ /*
+ * We now check if the denied requests are all covered by the
+ * layer's quiet access bits.
+ */
+ const access_mask_t quiet_mask =
+ pick_access_mask_for_request_type(
+ request->type, youngest_denied->quiet_masks);
+
+ quiet_applicable_to_access = (quiet_mask & missing) == missing;
+ } else {
+ /*
+ * Either the object is not quiet, or this is a scope request. We
+ * check request->type to distinguish between the two cases.
+ */
+ const access_mask_t quiet_mask =
+ youngest_denied->quiet_masks.scope;
+
+ switch (request->type) {
+ case LANDLOCK_REQUEST_SCOPE_SIGNAL:
+ quiet_applicable_to_access =
+ !!(quiet_mask & LANDLOCK_SCOPE_SIGNAL);
+ break;
+ case LANDLOCK_REQUEST_SCOPE_ABSTRACT_UNIX_SOCKET:
+ quiet_applicable_to_access =
+ !!(quiet_mask &
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+ break;
+ /*
+ * Leave LANDLOCK_REQUEST_PTRACE and
+ * LANDLOCK_REQUEST_FS_CHANGE_TOPOLOGY unhandled for now - they are
+ * never quiet.
+ */
+ default:
+ break;
+ }
+ }
+
+ if (quiet_applicable_to_access)
+ return;
+
/* Uses consistent allocation flags wrt common_lsm_audit(). */
ab = audit_log_start(audit_context(), GFP_ATOMIC | __GFP_NOWARN,
AUDIT_LANDLOCK_ACCESS);
diff --git a/security/landlock/audit.h b/security/landlock/audit.h
index b85d752273ac..620f8a24291d 100644
--- a/security/landlock/audit.h
+++ b/security/landlock/audit.h
@@ -48,6 +48,7 @@ struct landlock_request {
/* Required fields for requests with deny masks. */
const access_mask_t all_existing_optional_access;
deny_masks_t deny_masks;
+ optional_access_t quiet_optional_accesses;
};
#ifdef CONFIG_AUDIT
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index d1a4d8b33ee1..6f1cff739ae8 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -157,6 +157,44 @@ get_layer_deny_mask(const access_mask_t all_existing_optional_access,
<< ((access_weight - 1) * HWEIGHT(LANDLOCK_MAX_NUM_LAYERS - 1));
}
+/**
+ * landlock_get_quiet_optional_accesses - Get optional accesses which are
+ * covered by quiet rule flags.
+ *
+ * @all_existing_optional_access: Bitmask of valid optional accesses.
+ * @deny_masks: Domain layer levels that denied each optional access (the
+ * deny_masks field on struct landlock_file_security).
+ * @masks: The struct layer_masks collected during the path walk.
+ *
+ * Return: a bitmask of which optional accesses are denied by layers for
+ * which the quiet flag was collected during the path walk.
+ */
+optional_access_t landlock_get_quiet_optional_accesses(
+ const access_mask_t all_existing_optional_access,
+ const deny_masks_t deny_masks, const struct layer_masks *const masks)
+{
+ const unsigned long access_opt = all_existing_optional_access;
+ size_t access_index = 0;
+ unsigned long access_bit;
+ optional_access_t quiet_optional_accesses = 0;
+
+ /* This will require change with new object types. */
+ WARN_ON_ONCE(access_opt != _LANDLOCK_ACCESS_FS_OPTIONAL);
+
+ for_each_set_bit(access_bit, &access_opt,
+ BITS_PER_TYPE(access_mask_t)) {
+ const u8 layer =
+ (deny_masks >> (access_index *
+ HWEIGHT(LANDLOCK_MAX_NUM_LAYERS - 1))) &
+ (LANDLOCK_MAX_NUM_LAYERS - 1);
+
+ if (masks->layers[layer].quiet)
+ quiet_optional_accesses |= BIT(access_index);
+ access_index++;
+ }
+ return quiet_optional_accesses;
+}
+
#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST
static void test_get_layer_deny_mask(struct kunit *const test)
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index 9f560f3c3bd1..2a1660e3dea7 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -126,6 +126,10 @@ landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
const access_mask_t optional_access,
const struct layer_masks *const masks);
+optional_access_t landlock_get_quiet_optional_accesses(
+ const access_mask_t all_existing_optional_access,
+ const deny_masks_t deny_masks, const struct layer_masks *const masks);
+
int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy);
static inline void
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index a096e4aa7fcd..ccb2fe4fa056 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1805,6 +1805,10 @@ static int hook_file_open(struct file *const file)
#ifdef CONFIG_AUDIT
landlock_file(file)->deny_masks = landlock_get_deny_masks(
_LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks);
+ landlock_file(file)->quiet_optional_accesses =
+ landlock_get_quiet_optional_accesses(
+ _LANDLOCK_ACCESS_FS_OPTIONAL,
+ landlock_file(file)->deny_masks, &layer_masks);
#endif /* CONFIG_AUDIT */
if (access_mask_subset(open_access_request, allowed_access))
@@ -1841,6 +1845,7 @@ static int hook_file_truncate(struct file *const file)
.access = LANDLOCK_ACCESS_FS_TRUNCATE,
#ifdef CONFIG_AUDIT
.deny_masks = landlock_file(file)->deny_masks,
+ .quiet_optional_accesses = landlock_file(file)->quiet_optional_accesses,
#endif /* CONFIG_AUDIT */
});
return -EACCES;
@@ -1880,6 +1885,7 @@ static int hook_file_ioctl_common(const struct file *const file,
.access = LANDLOCK_ACCESS_FS_IOCTL_DEV,
#ifdef CONFIG_AUDIT
.deny_masks = landlock_file(file)->deny_masks,
+ .quiet_optional_accesses = landlock_file(file)->quiet_optional_accesses,
#endif /* CONFIG_AUDIT */
});
return -EACCES;
diff --git a/security/landlock/fs.h b/security/landlock/fs.h
index e4c530511360..7efe9b172acf 100644
--- a/security/landlock/fs.h
+++ b/security/landlock/fs.h
@@ -63,6 +63,14 @@ struct landlock_file_security {
* _LANDLOCK_ACCESS_FS_OPTIONAL).
*/
deny_masks_t deny_masks;
+ /**
+ * @quiet_optional_accesses: Stores which optional accesses are
+ * covered by quiet rules within the layer referred to in deny_masks,
+ * one access per bit. Does not take into account whether the quiet
+ * access bits are actually set in the layer's corresponding
+ * landlock_hierarchy.
+ */
+ optional_access_t quiet_optional_accesses;
/**
* @fown_layer: Layer level of @fown_subject->domain with
* LANDLOCK_SCOPE_SIGNAL.
@@ -91,13 +99,18 @@ struct landlock_file_security {
};
#ifdef CONFIG_AUDIT
-
/* Makes sure all layers can be identified. */
/* clang-format off */
static_assert((typeof_member(struct landlock_file_security, fown_layer))~0 >=
LANDLOCK_MAX_NUM_LAYERS);
-/* clang-format off */
-
+/* clang-format on */
+/*
+ * Make sure quiet_optional_accesses has enough bits to cover all optional
+ * accesses.
+ */
+static_assert(BITS_PER_TYPE(typeof_member(struct landlock_file_security,
+ quiet_optional_accesses)) >=
+ HWEIGHT(_LANDLOCK_ACCESS_FS_OPTIONAL));
#endif /* CONFIG_AUDIT */
/**
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 4b4f974dc877..51050dd39a3a 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -250,14 +250,13 @@ static int current_check_access_socket(struct socket *const sock,
audit_net.family = address->sa_family;
audit_net.sk = sock->sk;
- landlock_log_denial(subject,
- &(struct landlock_request){
- .type = LANDLOCK_REQUEST_NET_ACCESS,
- .audit.type = LSM_AUDIT_DATA_NET,
- .audit.u.net = &audit_net,
- .access = access_request,
- .layer_masks = &layer_masks,
- });
+ landlock_log_denial(
+ subject,
+ &(struct landlock_request){ .type = LANDLOCK_REQUEST_NET_ACCESS,
+ .audit.type = LSM_AUDIT_DATA_NET,
+ .audit.u.net = &audit_net,
+ .access = access_request,
+ .layer_masks = &layer_masks });
return -EACCES;
}
--
2.54.0
^ permalink raw reply related
* [PATCH v11 2/9] landlock: Add API support and docs for the quiet flags
From: Tingmao Wang @ 2026-06-12 1:48 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Tingmao Wang, Günther Noack, Justin Suess, Jan Kara,
Abhinav Saxena, linux-security-module
In-Reply-To: <cover.1781228815.git.m@maowtm.org>
Adds the UAPI for the quiet flags feature (but not the implementation
yet).
Even though currently LANDLOCK_ADD_RULE_QUIET only affects audit
logging, in the future this can also be used as part of a supervisor
mechanism, where it will also suppress denial notifications on a
per-object basis. Thus the name is deliberately generic, as opposed to
e.g. LANDLOCK_ADD_RULE_LOG_QUIET.
According to pahole, even after adding the struct access_masks quiet_masks
in struct landlock_hierarchy, the u32 log_* bitfield still only has a size
of 2 bytes, so there's minimal wasted space.
Assisted-by: GitHub-Copilot:claude-opus-4.8
Signed-off-by: Tingmao Wang <m@maowtm.org>
---
Changes in v11:
- Fix doc: clarify that one landlock_add_rule() call with the quiet flag
is enough to mark the object as quiet.
- audit log -> log
- Change all newly added flags argument to u32 to match syscall argument.
Changes in v9:
- Move a mistakenly included hunk into patch 1
- Doc change for sys_landlock_create_ruleset to add missing
"quiet_scoped | scoped == scoped" requirement.
- Doc changes for struct landlock_ruleset_attr, and re-wrap added bits
wider to stay consistent with the existing block.
- Other style changes from suggestions
- Added mention of this flag in the audit section of
Documentation/admin-guide/LSM/landlock.rst
- Added a block for this new flag to the "Previous limitations" section
in Documentation/userspace-api/landlock.rst
Changes in v8:
- The new Landlock ABI version is now v10 as a result of rebase.
- Allocate a rule_flags in hook_unix_find() and pass to
is_access_to_paths_allowed().
Changes in v6:
- Fix typo in doc
Changes in v5:
- Doc fixes.
- Fix build failure without CONFIG_AUDIT / CONFIG_INET (reported by Justin
Suess)
Changes in v4:
- Minor update to this commit message.
- Fix minor formatting
Changes in v3:
- Updated docs from Mickaël's suggestions.
Changes in v2:
- Per suggestion, added support for quieting only certain access bits,
controlled by extra quiet_access_* fields in the ruleset_attr.
- Added docs for the extra fields and made updates to doc changes in v1.
In particular, call out that the effect of LANDLOCK_ADD_RULE_QUIET is
independent from the access bits passed in rule_attr
- landlock_add_rule will return -EINVAL when LANDLOCK_ADD_RULE_QUIET is
used but the ruleset does not have any quiet access bits set for the
given rule type.
- ABI version bump to v8
- Syntactic and comment changes per suggestion.
Documentation/admin-guide/LSM/landlock.rst | 9 ++-
Documentation/userspace-api/landlock.rst | 14 ++++
include/uapi/linux/landlock.h | 61 +++++++++++++++++
security/landlock/domain.h | 5 ++
security/landlock/fs.c | 4 +-
security/landlock/fs.h | 2 +-
security/landlock/net.c | 5 +-
security/landlock/net.h | 5 +-
security/landlock/ruleset.c | 12 +++-
security/landlock/ruleset.h | 12 +++-
security/landlock/syscalls.c | 71 +++++++++++++++-----
tools/testing/selftests/landlock/base_test.c | 2 +-
12 files changed, 171 insertions(+), 31 deletions(-)
diff --git a/Documentation/admin-guide/LSM/landlock.rst b/Documentation/admin-guide/LSM/landlock.rst
index 9923874e2156..ccc32dad1d1c 100644
--- a/Documentation/admin-guide/LSM/landlock.rst
+++ b/Documentation/admin-guide/LSM/landlock.rst
@@ -19,8 +19,10 @@ Audit
Denied access requests are logged by default for a sandboxed program if `audit`
is enabled. This default behavior can be changed with the
sys_landlock_restrict_self() flags (cf.
-Documentation/userspace-api/landlock.rst). Landlock logs can also be masked
-thanks to audit rules. Landlock can generate 2 audit record types.
+Documentation/userspace-api/landlock.rst), or suppressed on a per-object
+basis by using ``LANDLOCK_ADD_RULE_QUIET`` (ABI 10+). Landlock logs can
+also be masked thanks to audit rules. Landlock can generate 2 audit
+record types.
Record types
------------
@@ -172,7 +174,8 @@ If you get spammed with audit logs related to Landlock, this is either an
attack attempt or a bug in the security policy. We can put in place some
filters to limit noise with two complementary ways:
-- with sys_landlock_restrict_self()'s flags if we can fix the sandboxed
+- with sys_landlock_restrict_self()'s flags, or
+ ``LANDLOCK_ADD_RULE_QUIET`` (ABI 10+) if we can fix the sandboxed
programs,
- or with audit rules (see :manpage:`auditctl(8)`).
diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
index 0ea55c2c732c..ce63ec564229 100644
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@@ -775,6 +775,20 @@ remote port of UDP sockets (via :manpage:`connect(2)), and sending
datagrams to an explicit remote port (ignoring any destination set on
UDP sockets, via e.g. :manpage:`sendto(2)).
+Quiet rule flag (ABI < 10)
+--------------------------
+
+Starting with the Landlock ABI version 10, it is possible to selectively
+suppress logs for specific denied accesses on a per-object basis with
+the ``LANDLOCK_ADD_RULE_QUIET`` flag of sys_landlock_add_rule(), in
+combination with the ``quiet_access_fs`` and ``quiet_access_net`` fields
+of struct landlock_ruleset_attr. It is also now possible to suppress
+logs for scope accesses via the ``quiet_scoped`` field of struct
+landlock_ruleset_attr. The object is marked as quiet within a ruleset
+when at least one sys_landlock_add_rule() call is made for it with the
+``LANDLOCK_ADD_RULE_QUIET`` flag, additional add-rule calls for the same
+object without this flag do not clear it.
+
.. _kernel_support:
Kernel support
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index b147223efc97..1bdd9444335f 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -32,6 +32,19 @@
* *handle* a wide range or all access rights that they know about at build time
* (and that they have tested with a kernel that supported them all).
*
+ * @quiet_access_fs and @quiet_access_net are bitmasks of actions for which a
+ * denial by this layer will not trigger a log if the corresponding object (or
+ * its children, for filesystem rules) is marked with the "quiet" bit via
+ * %LANDLOCK_ADD_RULE_QUIET, even if logging would normally take place per
+ * landlock_restrict_self() flags. @quiet_scoped is similar, except that it
+ * does not require marking any objects as quiet - if the ruleset is created
+ * with any bits set in @quiet_scoped, then denial of such scoped resources will
+ * not trigger any log. These 3 fields are available since Landlock ABI version
+ * 10.
+ *
+ * @quiet_access_fs, @quiet_access_net and @quiet_scoped must be a subset of
+ * @handled_access_fs, @handled_access_net and @scoped respectively.
+ *
* This structure can grow in future Landlock versions.
*/
struct landlock_ruleset_attr {
@@ -51,6 +64,21 @@ struct landlock_ruleset_attr {
* resources (e.g. IPCs).
*/
__u64 scoped;
+ /**
+ * @quiet_access_fs: Bitmask of filesystem actions which should not be
+ * logged if per-object quiet flag is set.
+ */
+ __u64 quiet_access_fs;
+ /**
+ * @quiet_access_net: Bitmask of network actions which should not be
+ * logged if per-object quiet flag is set.
+ */
+ __u64 quiet_access_net;
+ /**
+ * @quiet_scoped: Bitmask of scoped actions which should not be
+ * logged.
+ */
+ __u64 quiet_scoped;
};
/**
@@ -69,6 +97,39 @@ struct landlock_ruleset_attr {
#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1)
/* clang-format on */
+/**
+ * DOC: landlock_add_rule_flags
+ *
+ * **Flags**
+ *
+ * %LANDLOCK_ADD_RULE_QUIET
+ * Together with the quiet_* fields in struct landlock_ruleset_attr,
+ * this flag controls whether Landlock will log audit messages when
+ * access to the objects covered by this rule is denied by this layer.
+ *
+ * If logging is enabled, when Landlock denies an access, it will
+ * suppress the log if all of the following are true:
+ *
+ * - this layer is the innermost layer that denied the access;
+ * - all accesses denied by this layer are part of the quiet_* fields
+ * in the related struct landlock_ruleset_attr;
+ * - the object (or one of its parents, for filesystem rules) is
+ * marked as "quiet" via %LANDLOCK_ADD_RULE_QUIET.
+ *
+ * Because logging is only suppressed by a layer if the layer denies
+ * access, a sandboxed program cannot use this flag to "hide" access
+ * denials, without denying itself the access in the first place.
+ *
+ * The effect of this flag does not depend on the value of
+ * allowed_access in the passed in rule_attr. When this flag is
+ * present, the caller is also allowed to pass in an empty
+ * allowed_access.
+ */
+
+/* clang-format off */
+#define LANDLOCK_ADD_RULE_QUIET (1U << 0)
+/* clang-format on */
+
/**
* DOC: landlock_restrict_self_flags
*
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index af100a8cd939..9f560f3c3bd1 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -111,6 +111,11 @@ struct landlock_hierarchy {
* %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON. Set to false by default.
*/
log_new_exec : 1;
+ /**
+ * @quiet_masks: Bitmasks of access that should be quieted (i.e. not
+ * logged) if the related object is marked as quiet.
+ */
+ struct access_masks quiet_masks;
#endif /* CONFIG_AUDIT */
};
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index c724692bb990..a096e4aa7fcd 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -325,7 +325,7 @@ static struct landlock_object *get_inode_object(struct inode *const inode)
*/
int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
const struct path *const path,
- access_mask_t access_rights)
+ access_mask_t access_rights, const u32 flags)
{
int err;
struct landlock_id id = {
@@ -346,7 +346,7 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
if (IS_ERR(id.key.object))
return PTR_ERR(id.key.object);
mutex_lock(&ruleset->lock);
- err = landlock_insert_rule(ruleset, id, access_rights);
+ err = landlock_insert_rule(ruleset, id, access_rights, flags);
mutex_unlock(&ruleset->lock);
/*
* No need to check for an error because landlock_insert_rule()
diff --git a/security/landlock/fs.h b/security/landlock/fs.h
index 911b83669e20..e4c530511360 100644
--- a/security/landlock/fs.h
+++ b/security/landlock/fs.h
@@ -136,6 +136,6 @@ __init void landlock_add_fs_hooks(void);
int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
const struct path *const path,
- access_mask_t access_hierarchy);
+ access_mask_t access_hierarchy, const u32 flags);
#endif /* _SECURITY_LANDLOCK_FS_H */
diff --git a/security/landlock/net.c b/security/landlock/net.c
index d472e6cab12f..4b4f974dc877 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -20,7 +20,8 @@
#include "ruleset.h"
int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
- const u16 port, access_mask_t access_rights)
+ const u16 port, access_mask_t access_rights,
+ const u32 flags)
{
int err;
const struct landlock_id id = {
@@ -35,7 +36,7 @@ int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
~landlock_get_net_access_mask(ruleset, 0);
mutex_lock(&ruleset->lock);
- err = landlock_insert_rule(ruleset, id, access_rights);
+ err = landlock_insert_rule(ruleset, id, access_rights, flags);
mutex_unlock(&ruleset->lock);
return err;
diff --git a/security/landlock/net.h b/security/landlock/net.h
index 09960c237a13..5c0e3b4090cb 100644
--- a/security/landlock/net.h
+++ b/security/landlock/net.h
@@ -16,7 +16,8 @@
__init void landlock_add_net_hooks(void);
int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
- const u16 port, access_mask_t access_rights);
+ const u16 port, access_mask_t access_rights,
+ const u32 flags);
#else /* IS_ENABLED(CONFIG_INET) */
static inline void landlock_add_net_hooks(void)
{
@@ -24,7 +25,7 @@ static inline void landlock_add_net_hooks(void)
static inline int
landlock_append_net_rule(struct landlock_ruleset *const ruleset, const u16 port,
- access_mask_t access_rights)
+ access_mask_t access_rights, const u32 flags)
{
return -EAFNOSUPPORT;
}
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 91948e406e69..46cda04d9670 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
+#include <uapi/linux/landlock.h>
#include "access.h"
#include "domain.h"
@@ -255,6 +256,7 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
if (WARN_ON_ONCE(this->layers[0].level != 0))
return -EINVAL;
this->layers[0].access |= (*layers)[0].access;
+ this->layers[0].flags.quiet |= (*layers)[0].flags.quiet;
return 0;
}
@@ -305,12 +307,15 @@ static void build_check_layer(void)
/* @ruleset must be locked by the caller. */
int landlock_insert_rule(struct landlock_ruleset *const ruleset,
const struct landlock_id id,
- const access_mask_t access)
+ const access_mask_t access, const u32 flags)
{
struct landlock_layer layers[] = { {
.access = access,
/* When @level is zero, insert_rule() extends @ruleset. */
.level = 0,
+ .flags = {
+ .quiet = !!(flags & LANDLOCK_ADD_RULE_QUIET),
+ },
} };
build_check_layer();
@@ -351,6 +356,7 @@ static int merge_tree(struct landlock_ruleset *const dst,
return -EINVAL;
layers[0].access = walker_rule->layers[0].access;
+ layers[0].flags = walker_rule->layers[0].flags;
err = insert_rule(dst, id, &layers, ARRAY_SIZE(layers));
if (err)
@@ -581,6 +587,10 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
if (err)
return ERR_PTR(err);
+#ifdef CONFIG_AUDIT
+ new_dom->hierarchy->quiet_masks = ruleset->quiet_masks;
+#endif /* CONFIG_AUDIT */
+
return no_free_ptr(new_dom);
}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index d0fa9af46a2c..c819d0c40796 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -156,8 +156,8 @@ struct landlock_ruleset {
* @work_free: Enables to free a ruleset within a lockless
* section. This is only used by
* landlock_put_ruleset_deferred() when @usage reaches zero.
- * The fields @lock, @usage, @num_rules, @num_layers and
- * @access_masks are then unused.
+ * The fields @lock, @usage, @num_rules, @num_layers, @quiet_masks
+ * and @access_masks are then unused.
*/
struct work_struct work_free;
struct {
@@ -183,6 +183,12 @@ struct landlock_ruleset {
* non-merged ruleset (i.e. not a domain).
*/
u32 num_layers;
+ /**
+ * @quiet_masks: Stores the quiet flags for an unmerged
+ * ruleset. For a merged domain, this is stored in each
+ * layer's struct landlock_hierarchy instead.
+ */
+ struct access_masks quiet_masks;
/**
* @access_masks: Contains the subset of filesystem and
* network actions that are restricted by a ruleset.
@@ -213,7 +219,7 @@ DEFINE_FREE(landlock_put_ruleset, struct landlock_ruleset *,
int landlock_insert_rule(struct landlock_ruleset *const ruleset,
const struct landlock_id id,
- const access_mask_t access);
+ const access_mask_t access, const u32 flags);
struct landlock_ruleset *
landlock_merge_ruleset(struct landlock_ruleset *const parent,
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index d45469d5d464..425f093e2407 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -105,8 +105,11 @@ static void build_check_abi(void)
ruleset_size = sizeof(ruleset_attr.handled_access_fs);
ruleset_size += sizeof(ruleset_attr.handled_access_net);
ruleset_size += sizeof(ruleset_attr.scoped);
+ ruleset_size += sizeof(ruleset_attr.quiet_access_fs);
+ ruleset_size += sizeof(ruleset_attr.quiet_access_net);
+ ruleset_size += sizeof(ruleset_attr.quiet_scoped);
BUILD_BUG_ON(sizeof(ruleset_attr) != ruleset_size);
- BUILD_BUG_ON(sizeof(ruleset_attr) != 24);
+ BUILD_BUG_ON(sizeof(ruleset_attr) != 48);
path_beneath_size = sizeof(path_beneath_attr.allowed_access);
path_beneath_size += sizeof(path_beneath_attr.parent_fd);
@@ -193,6 +196,9 @@ const int landlock_abi_version = 10;
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
* - %EINVAL: unknown @flags, or unknown access, or unknown scope, or too small
* @size;
+ * - %EINVAL: quiet_access_fs, quiet_access_net, or quiet_scoped is not a
+ * subset of the corresponding handled_access_fs, handled_access_net, or
+ * scoped;
* - %E2BIG: @attr or @size inconsistencies;
* - %EFAULT: @attr or @size inconsistencies;
* - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs.
@@ -249,6 +255,21 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
if ((ruleset_attr.scoped | LANDLOCK_MASK_SCOPE) != LANDLOCK_MASK_SCOPE)
return -EINVAL;
+ /*
+ * Check that quiet masks are subsets of the respective handled masks.
+ * Because of the checks above this is sufficient to also ensure that
+ * the quiet masks are valid access masks.
+ */
+ if ((ruleset_attr.quiet_access_fs | ruleset_attr.handled_access_fs) !=
+ ruleset_attr.handled_access_fs)
+ return -EINVAL;
+ if ((ruleset_attr.quiet_access_net | ruleset_attr.handled_access_net) !=
+ ruleset_attr.handled_access_net)
+ return -EINVAL;
+ if ((ruleset_attr.quiet_scoped | ruleset_attr.scoped) !=
+ ruleset_attr.scoped)
+ return -EINVAL;
+
/* Checks arguments and transforms to kernel struct. */
ruleset = landlock_create_ruleset(ruleset_attr.handled_access_fs,
ruleset_attr.handled_access_net,
@@ -256,6 +277,10 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
if (IS_ERR(ruleset))
return PTR_ERR(ruleset);
+ ruleset->quiet_masks.fs = ruleset_attr.quiet_access_fs;
+ ruleset->quiet_masks.net = ruleset_attr.quiet_access_net;
+ ruleset->quiet_masks.scope = ruleset_attr.quiet_scoped;
+
/* Creates anonymous FD referring to the ruleset. */
ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops,
ruleset, O_RDWR | O_CLOEXEC);
@@ -320,7 +345,7 @@ static int get_path_from_fd(const s32 fd, struct path *const path)
}
static int add_rule_path_beneath(struct landlock_ruleset *const ruleset,
- const void __user *const rule_attr)
+ const void __user *const rule_attr, u32 flags)
{
struct landlock_path_beneath_attr path_beneath_attr;
struct path path;
@@ -335,9 +360,10 @@ static int add_rule_path_beneath(struct landlock_ruleset *const ruleset,
/*
* Informs about useless rule: empty allowed_access (i.e. deny rules)
- * are ignored in path walks.
+ * are ignored in path walks. However, the rule is not useless if it
+ * is there to hold a quiet flag.
*/
- if (!path_beneath_attr.allowed_access)
+ if (!flags && !path_beneath_attr.allowed_access)
return -ENOMSG;
/* Checks that allowed_access matches the @ruleset constraints. */
@@ -345,6 +371,10 @@ static int add_rule_path_beneath(struct landlock_ruleset *const ruleset,
if ((path_beneath_attr.allowed_access | mask) != mask)
return -EINVAL;
+ /* Checks for useless quiet flag. */
+ if (flags & LANDLOCK_ADD_RULE_QUIET && !ruleset->quiet_masks.fs)
+ return -EINVAL;
+
/* Gets and checks the new rule. */
err = get_path_from_fd(path_beneath_attr.parent_fd, &path);
if (err)
@@ -352,13 +382,13 @@ static int add_rule_path_beneath(struct landlock_ruleset *const ruleset,
/* Imports the new rule. */
err = landlock_append_fs_rule(ruleset, &path,
- path_beneath_attr.allowed_access);
+ path_beneath_attr.allowed_access, flags);
path_put(&path);
return err;
}
static int add_rule_net_port(struct landlock_ruleset *ruleset,
- const void __user *const rule_attr)
+ const void __user *const rule_attr, u32 flags)
{
struct landlock_net_port_attr net_port_attr;
int res;
@@ -371,9 +401,10 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
/*
* Informs about useless rule: empty allowed_access (i.e. deny rules)
- * are ignored by network actions.
+ * are ignored by network actions. However, the rule is not useless
+ * if it is there to hold a quiet flag.
*/
- if (!net_port_attr.allowed_access)
+ if (!flags && !net_port_attr.allowed_access)
return -ENOMSG;
/* Checks that allowed_access matches the @ruleset constraints. */
@@ -381,13 +412,17 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
if ((net_port_attr.allowed_access | mask) != mask)
return -EINVAL;
+ /* Checks for useless quiet flag. */
+ if (flags & LANDLOCK_ADD_RULE_QUIET && !ruleset->quiet_masks.net)
+ return -EINVAL;
+
/* Denies inserting a rule with port greater than 65535. */
if (net_port_attr.port > U16_MAX)
return -EINVAL;
/* Imports the new rule. */
return landlock_append_net_rule(ruleset, net_port_attr.port,
- net_port_attr.allowed_access);
+ net_port_attr.allowed_access, flags);
}
/**
@@ -398,7 +433,7 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
* @rule_type: Identify the structure type pointed to by @rule_attr:
* %LANDLOCK_RULE_PATH_BENEATH or %LANDLOCK_RULE_NET_PORT.
* @rule_attr: Pointer to a rule (matching the @rule_type).
- * @flags: Must be 0.
+ * @flags: Must be 0 or %LANDLOCK_ADD_RULE_QUIET.
*
* This system call enables to define a new rule and add it to an existing
* ruleset.
@@ -408,20 +443,25 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
* - %EAFNOSUPPORT: @rule_type is %LANDLOCK_RULE_NET_PORT but TCP/IP is not
* supported by the running kernel;
- * - %EINVAL: @flags is not 0;
+ * - %EINVAL: @flags is not valid;
* - %EINVAL: The rule accesses are inconsistent (i.e.
* &landlock_path_beneath_attr.allowed_access or
* &landlock_net_port_attr.allowed_access is not a subset of the ruleset
* handled accesses)
* - %EINVAL: &landlock_net_port_attr.port is greater than 65535;
+ * - %EINVAL: LANDLOCK_ADD_RULE_QUIET is passed but the ruleset has no
+ * quiet access bits set for the corresponding rule type.
* - %ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access is
- * 0);
+ * 0) and no flags;
* - %EBADF: @ruleset_fd is not a file descriptor for the current thread, or a
* member of @rule_attr is not a file descriptor as expected;
* - %EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of
* @rule_attr is not the expected file descriptor type;
* - %EPERM: @ruleset_fd has no write access to the underlying ruleset;
* - %EFAULT: @rule_attr was not a valid address.
+ *
+ * .. kernel-doc:: include/uapi/linux/landlock.h
+ * :identifiers: landlock_add_rule_flags
*/
SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
const enum landlock_rule_type, rule_type,
@@ -432,8 +472,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
if (!is_initialized())
return -EOPNOTSUPP;
- /* No flag for now. */
- if (flags)
+ if (flags && flags != LANDLOCK_ADD_RULE_QUIET)
return -EINVAL;
/* Gets and checks the ruleset. */
@@ -443,9 +482,9 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
switch (rule_type) {
case LANDLOCK_RULE_PATH_BENEATH:
- return add_rule_path_beneath(ruleset, rule_attr);
+ return add_rule_path_beneath(ruleset, rule_attr, flags);
case LANDLOCK_RULE_NET_PORT:
- return add_rule_net_port(ruleset, rule_attr);
+ return add_rule_net_port(ruleset, rule_attr, flags);
default:
return -EINVAL;
}
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 6c8113c2ded1..84e91fcaa1b2 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -201,7 +201,7 @@ TEST(add_rule_checks_ordering)
ASSERT_LE(0, ruleset_fd);
/* Checks invalid flags. */
- ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 100));
ASSERT_EQ(EINVAL, errno);
/* Checks invalid ruleset FD. */
--
2.54.0
^ permalink raw reply related
* [PATCH v11 1/9] landlock: Add a place for flags to layer rules
From: Tingmao Wang @ 2026-06-12 1:48 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Tingmao Wang, Günther Noack, Justin Suess, Jan Kara,
Abhinav Saxena, linux-security-module
In-Reply-To: <cover.1781228815.git.m@maowtm.org>
To avoid unnecessarily increasing the size of struct landlock_layer, we
make the layer level a u8 and use the space to store the flags struct.
struct layer_access_masks is renamed to struct layer_masks, and a new
field is added to track whether a quiet flag rule is seen for each
layer. Through use of bitfields, this does not increase the size of the
struct.
Cc: Justin Suess <utilityemal77@gmail.com>
Assisted-by: GitHub-Copilot:claude-opus-4.8 copilot-review
Signed-off-by: Tingmao Wang <m@maowtm.org>
Co-developed-by: Justin Suess <utilityemal77@gmail.com>
Signed-off-by: Justin Suess <utilityemal77@gmail.com>
Tested-by: Justin Suess <utilityemal77@gmail.com>
---
Changes in v11:
- doc changes from feedback
- access_masks_t quiet : 1 instead of bool quiet : 1
- __packed __aligned(sizeof(access_mask_t))
- u8 quiet:1 instead of bool quiet:1 in struct landlock_layer
- Turn all "audit log" mentions into "log"
Changes in v10:
- Doc for struct layer_mask members
- clang-format header file changes
- Add Tested-by for Justin Suess
Changes in v9:
- Move a hunk from patch 2 to here
- Fix comment and format
- Renamed struct layer_access_masks to struct layer_masks, and moved the
content of struct collected_rule_flags into this struct, getting rid
of the extra struct collected_rule_flags and function parameters.
This is following a discussion in [3]. The flag is now initialized in
landlock_init_layer_masks as false.
- Thus also removed now unnecessary layer_mask_t
Changes in v8:
- Rebase on top of mic/next
- Add Co-developed-by: Justin Suess for handling this rebase initially
- layer_mask_t was removed in [1] but we still need it for the
collected_rule_flags. Rather than using raw u16, I've chosen to
re-define it back in ruleset.h (it was in access.h).
Changes in v7:
- Take rule_flags separately from landlock_request in
is_access_to_paths_allowed to avoid writing to the landlock_request
variable if CONFIG_AUDIT is disabled (to enable compiler elision).
- Due to the above change, we don't need rule_flags in landlock_request in
this commit anymore (will be added later).
Changes in v6:
- Rebased to include the revised disconnected directory handling changes
(without the "reverting" behaviour)
Changes in v5:
- Move rule_flags into landlock_request. This lets us get rid of the
extra parameters to is_access_to_paths_allowed (and later on,
landlock_log_denial), and thus less code changes.
Changes in v3:
- Comment changes, move local variables, simplify if branch
Changes in v2:
- Comment changes
- Rebased to include disconnected directory handling changes on mic/next
and add backing up of collected_rule_flags.
[1]: https://lore.kernel.org/all/20260125195853.109967-1-gnoack3000@gmail.com/
[2]: https://lore.kernel.org/all/20251221194301.247484-1-utilityemal77@gmail.com/
[3]: https://lore.kernel.org/all/20260524.eFiz4hahrami@digikod.net/
security/landlock/access.h | 39 ++++++++--
security/landlock/audit.c | 20 ++---
security/landlock/audit.h | 2 +-
security/landlock/domain.c | 19 ++---
security/landlock/domain.h | 2 +-
security/landlock/fs.c | 147 +++++++++++++++++++-----------------
security/landlock/limits.h | 3 +
security/landlock/net.c | 2 +-
security/landlock/ruleset.c | 33 +++++---
security/landlock/ruleset.h | 17 ++++-
10 files changed, 174 insertions(+), 110 deletions(-)
diff --git a/security/landlock/access.h b/security/landlock/access.h
index c19d5bc13944..94f4b9fb7238 100644
--- a/security/landlock/access.h
+++ b/security/landlock/access.h
@@ -62,18 +62,41 @@ static_assert(sizeof(typeof_member(union access_masks_all, masks)) ==
sizeof(typeof_member(union access_masks_all, all)));
/**
- * struct layer_access_masks - A boolean matrix of layers and access rights
+ * struct layer_mask - The access rights and rule flags for a layer.
*
- * This has a bit for each combination of layer numbers and access rights.
- * During access checks, it is used to represent the access rights for each
- * layer which still need to be fulfilled. When all bits are 0, the access
- * request is considered to be fulfilled.
+ * This has a bit for each access rights and rule flags. During access
+ * checks, it is used to represent the access rights for each layer which
+ * still need to be fulfilled. When all bits are 0, the access request is
+ * considered to be fulfilled.
*/
-struct layer_access_masks {
+struct layer_mask {
/**
- * @access: The unfulfilled access rights for each layer.
+ * @access: The unfulfilled access rights for this layer.
*/
- access_mask_t access[LANDLOCK_MAX_NUM_LAYERS];
+ access_mask_t access : LANDLOCK_NUM_ACCESS_MAX;
+#ifdef CONFIG_AUDIT
+ /**
+ * @quiet: Whether we have encountered a rule with the quiet flag for
+ * this layer. Used to control logging.
+ */
+ access_mask_t quiet : 1;
+#endif /* CONFIG_AUDIT */
+} __packed __aligned(sizeof(access_mask_t));
+
+/*
+ * Make sure that we don't increase the size of struct layer_mask when
+ * storing rule flags.
+ */
+static_assert(sizeof(struct layer_mask) == sizeof(access_mask_t));
+
+/**
+ * struct layer_masks - An array of struct layer_mask, one per layer.
+ */
+struct layer_masks {
+ /**
+ * @layers: The unfulfilled access rights for each layer.
+ */
+ struct layer_mask layers[LANDLOCK_MAX_NUM_LAYERS];
};
/*
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index 851647197a01..8c56f7f6467a 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -187,11 +187,11 @@ static void test_get_hierarchy(struct kunit *const test)
/* Get the youngest layer that denied the access_request. */
static size_t get_denied_layer(const struct landlock_ruleset *const domain,
access_mask_t *const access_request,
- const struct layer_access_masks *masks)
+ const struct layer_masks *masks)
{
- for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) {
- if (masks->access[i] & *access_request) {
- *access_request &= masks->access[i];
+ for (ssize_t i = ARRAY_SIZE(masks->layers) - 1; i >= 0; i--) {
+ if (masks->layers[i].access & *access_request) {
+ *access_request &= masks->layers[i].access;
return i;
}
}
@@ -208,12 +208,12 @@ static void test_get_denied_layer(struct kunit *const test)
const struct landlock_ruleset dom = {
.num_layers = 5,
};
- const struct layer_access_masks masks = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
- LANDLOCK_ACCESS_FS_READ_DIR,
- .access[1] = LANDLOCK_ACCESS_FS_READ_FILE |
- LANDLOCK_ACCESS_FS_READ_DIR,
- .access[2] = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ const struct layer_masks masks = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ .layers[1].access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ .layers[2].access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
};
access_mask_t access;
diff --git a/security/landlock/audit.h b/security/landlock/audit.h
index 56778331b58c..b85d752273ac 100644
--- a/security/landlock/audit.h
+++ b/security/landlock/audit.h
@@ -43,7 +43,7 @@ struct landlock_request {
access_mask_t access;
/* Required fields for requests with layer masks. */
- const struct layer_access_masks *layer_masks;
+ const struct layer_masks *layer_masks;
/* Required fields for requests with deny masks. */
const access_mask_t all_existing_optional_access;
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index 5dd06f7c2312..d1a4d8b33ee1 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -184,7 +184,7 @@ static void test_get_layer_deny_mask(struct kunit *const test)
deny_masks_t
landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
const access_mask_t optional_access,
- const struct layer_access_masks *const masks)
+ const struct layer_masks *const masks)
{
const unsigned long access_opt = optional_access;
unsigned long access_bit;
@@ -201,8 +201,9 @@ landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
if (WARN_ON_ONCE(!access_opt))
return 0;
- for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) {
- const access_mask_t denied = masks->access[i] & optional_access;
+ for (ssize_t i = ARRAY_SIZE(masks->layers) - 1; i >= 0; i--) {
+ const access_mask_t denied = masks->layers[i].access &
+ optional_access;
const unsigned long newly_denied = denied & ~all_denied;
if (!newly_denied)
@@ -222,12 +223,12 @@ landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
static void test_landlock_get_deny_masks(struct kunit *const test)
{
- const struct layer_access_masks layers1 = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
- LANDLOCK_ACCESS_FS_IOCTL_DEV,
- .access[1] = LANDLOCK_ACCESS_FS_TRUNCATE,
- .access[2] = LANDLOCK_ACCESS_FS_IOCTL_DEV,
- .access[9] = LANDLOCK_ACCESS_FS_EXECUTE,
+ const struct layer_masks layers1 = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .layers[1].access = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .layers[2].access = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .layers[9].access = LANDLOCK_ACCESS_FS_EXECUTE,
};
KUNIT_EXPECT_EQ(test, 0x1,
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index 35cac8f6daee..af100a8cd939 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -119,7 +119,7 @@ struct landlock_hierarchy {
deny_masks_t
landlock_get_deny_masks(const access_mask_t all_existing_optional_access,
const access_mask_t optional_access,
- const struct layer_access_masks *const masks);
+ const struct layer_masks *const masks);
int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy);
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index ff2c12e38bfc..c724692bb990 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -406,15 +406,15 @@ static const struct access_masks any_fs = {
* src_parent would result in having the same or fewer access rights if it were
* moved under new_parent.
*/
-static bool may_refer(const struct layer_access_masks *const src_parent,
- const struct layer_access_masks *const src_child,
- const struct layer_access_masks *const new_parent,
+static bool may_refer(const struct layer_masks *const src_parent,
+ const struct layer_masks *const src_child,
+ const struct layer_masks *const new_parent,
const bool child_is_dir)
{
- for (size_t i = 0; i < ARRAY_SIZE(new_parent->access); i++) {
- access_mask_t child_access = src_parent->access[i] &
- src_child->access[i];
- access_mask_t parent_access = new_parent->access[i];
+ for (size_t i = 0; i < ARRAY_SIZE(new_parent->layers); i++) {
+ access_mask_t child_access = src_parent->layers[i].access &
+ src_child->layers[i].access;
+ access_mask_t parent_access = new_parent->layers[i].access;
if (!child_is_dir) {
child_access &= ACCESS_FILE;
@@ -436,11 +436,11 @@ static bool may_refer(const struct layer_access_masks *const src_parent,
* that child2 may be used from parent2 to parent1 without increasing its access
* rights), false otherwise.
*/
-static bool no_more_access(const struct layer_access_masks *const parent1,
- const struct layer_access_masks *const child1,
+static bool no_more_access(const struct layer_masks *const parent1,
+ const struct layer_masks *const child1,
const bool child1_is_dir,
- const struct layer_access_masks *const parent2,
- const struct layer_access_masks *const child2,
+ const struct layer_masks *const parent2,
+ const struct layer_masks *const child2,
const bool child2_is_dir)
{
if (!may_refer(parent1, child1, parent2, child1_is_dir))
@@ -459,25 +459,25 @@ static bool no_more_access(const struct layer_access_masks *const parent1,
static void test_no_more_access(struct kunit *const test)
{
- const struct layer_access_masks rx0 = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
- LANDLOCK_ACCESS_FS_READ_FILE,
+ const struct layer_masks rx0 = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_READ_FILE,
};
- const struct layer_access_masks mx0 = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE |
- LANDLOCK_ACCESS_FS_MAKE_REG,
+ const struct layer_masks mx0 = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
};
- const struct layer_access_masks x0 = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
+ const struct layer_masks x0 = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE,
};
- const struct layer_access_masks x1 = {
- .access[1] = LANDLOCK_ACCESS_FS_EXECUTE,
+ const struct layer_masks x1 = {
+ .layers[1].access = LANDLOCK_ACCESS_FS_EXECUTE,
};
- const struct layer_access_masks x01 = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
- .access[1] = LANDLOCK_ACCESS_FS_EXECUTE,
+ const struct layer_masks x01 = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .layers[1].access = LANDLOCK_ACCESS_FS_EXECUTE,
};
- const struct layer_access_masks allows_all = {};
+ const struct layer_masks allows_all = {};
/* Checks without restriction. */
NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false);
@@ -565,9 +565,13 @@ static void test_no_more_access(struct kunit *const test)
#undef NMA_TRUE
#undef NMA_FALSE
-static bool is_layer_masks_allowed(const struct layer_access_masks *masks)
+static bool is_layer_masks_allowed(const struct layer_masks *masks)
{
- return mem_is_zero(&masks->access, sizeof(masks->access));
+ for (size_t i = 0; i < ARRAY_SIZE(masks->layers); i++) {
+ if (masks->layers[i].access)
+ return false;
+ }
+ return true;
}
/*
@@ -576,16 +580,16 @@ static bool is_layer_masks_allowed(const struct layer_access_masks *masks)
* Returns true if the request is allowed, false otherwise.
*/
static bool scope_to_request(const access_mask_t access_request,
- struct layer_access_masks *masks)
+ struct layer_masks *masks)
{
bool saw_unfulfilled_access = false;
if (WARN_ON_ONCE(!masks))
return true;
- for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
- masks->access[i] &= access_request;
- if (masks->access[i])
+ for (size_t i = 0; i < ARRAY_SIZE(masks->layers); i++) {
+ masks->layers[i].access &= access_request;
+ if (masks->layers[i].access)
saw_unfulfilled_access = true;
}
return !saw_unfulfilled_access;
@@ -596,41 +600,46 @@ static bool scope_to_request(const access_mask_t access_request,
static void test_scope_to_request_with_exec_none(struct kunit *const test)
{
/* Allows everything. */
- struct layer_access_masks masks = {};
+ struct layer_masks masks = {};
/* Checks and scopes with execute. */
KUNIT_EXPECT_TRUE(test,
scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &masks));
- KUNIT_EXPECT_EQ(test, 0, masks.access[0]);
+ KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[0].access);
}
static void test_scope_to_request_with_exec_some(struct kunit *const test)
{
/* Denies execute and write. */
- struct layer_access_masks masks = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
- .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ struct layer_masks masks = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .layers[1].access = LANDLOCK_ACCESS_FS_WRITE_FILE,
};
/* Checks and scopes with execute. */
KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE,
&masks));
- KUNIT_EXPECT_EQ(test, LANDLOCK_ACCESS_FS_EXECUTE, masks.access[0]);
- KUNIT_EXPECT_EQ(test, 0, masks.access[1]);
+ /*
+ * These casts to access_mask_t are needed because typeof(), used in
+ * KUNIT_EXPECT_EQ(), does not work on bitfields.
+ */
+ KUNIT_EXPECT_EQ(test, LANDLOCK_ACCESS_FS_EXECUTE,
+ (access_mask_t)masks.layers[0].access);
+ KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[1].access);
}
static void test_scope_to_request_without_access(struct kunit *const test)
{
/* Denies execute and write. */
- struct layer_access_masks masks = {
- .access[0] = LANDLOCK_ACCESS_FS_EXECUTE,
- .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ struct layer_masks masks = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .layers[1].access = LANDLOCK_ACCESS_FS_WRITE_FILE,
};
/* Checks and scopes without access request. */
KUNIT_EXPECT_TRUE(test, scope_to_request(0, &masks));
- KUNIT_EXPECT_EQ(test, 0, masks.access[0]);
- KUNIT_EXPECT_EQ(test, 0, masks.access[1]);
+ KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[0].access);
+ KUNIT_EXPECT_EQ(test, 0, (access_mask_t)masks.layers[1].access);
}
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
@@ -639,15 +648,15 @@ static void test_scope_to_request_without_access(struct kunit *const test)
* Returns true if there is at least one access right different than
* LANDLOCK_ACCESS_FS_REFER.
*/
-static bool is_eacces(const struct layer_access_masks *masks,
+static bool is_eacces(const struct layer_masks *masks,
const access_mask_t access_request)
{
if (!masks)
return false;
- for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
+ for (size_t i = 0; i < ARRAY_SIZE(masks->layers); i++) {
/* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
- if (masks->access[i] & access_request &
+ if (masks->layers[i].access & access_request &
~LANDLOCK_ACCESS_FS_REFER)
return true;
}
@@ -661,7 +670,7 @@ static bool is_eacces(const struct layer_access_masks *masks,
static void test_is_eacces_with_none(struct kunit *const test)
{
- const struct layer_access_masks masks = {};
+ const struct layer_masks masks = {};
IE_FALSE(&masks, 0);
IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER);
@@ -671,8 +680,8 @@ static void test_is_eacces_with_none(struct kunit *const test)
static void test_is_eacces_with_refer(struct kunit *const test)
{
- const struct layer_access_masks masks = {
- .access[0] = LANDLOCK_ACCESS_FS_REFER,
+ const struct layer_masks masks = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_REFER,
};
IE_FALSE(&masks, 0);
@@ -683,8 +692,8 @@ static void test_is_eacces_with_refer(struct kunit *const test)
static void test_is_eacces_with_write(struct kunit *const test)
{
- const struct layer_access_masks masks = {
- .access[0] = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ const struct layer_masks masks = {
+ .layers[0].access = LANDLOCK_ACCESS_FS_WRITE_FILE,
};
IE_FALSE(&masks, 0);
@@ -743,11 +752,11 @@ static bool
is_access_to_paths_allowed(const struct landlock_ruleset *const domain,
const struct path *const path,
const access_mask_t access_request_parent1,
- struct layer_access_masks *layer_masks_parent1,
+ struct layer_masks *layer_masks_parent1,
struct landlock_request *const log_request_parent1,
struct dentry *const dentry_child1,
const access_mask_t access_request_parent2,
- struct layer_access_masks *layer_masks_parent2,
+ struct layer_masks *layer_masks_parent2,
struct landlock_request *const log_request_parent2,
struct dentry *const dentry_child2)
{
@@ -755,9 +764,9 @@ is_access_to_paths_allowed(const struct landlock_ruleset *const domain,
child1_is_directory = true, child2_is_directory = true;
struct path walker_path;
access_mask_t access_masked_parent1, access_masked_parent2;
- struct layer_access_masks _layer_masks_child1, _layer_masks_child2;
- struct layer_access_masks *layer_masks_child1 = NULL,
- *layer_masks_child2 = NULL;
+ struct layer_masks _layer_masks_child1, _layer_masks_child2;
+ struct layer_masks *layer_masks_child1 = NULL,
+ *layer_masks_child2 = NULL;
if (!access_request_parent1 && !access_request_parent2)
return true;
@@ -797,6 +806,10 @@ is_access_to_paths_allowed(const struct landlock_ruleset *const domain,
}
if (unlikely(dentry_child1)) {
+ /*
+ * Get the layer masks for the child dentries for use by domain
+ * check later.
+ */
if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
&_layer_masks_child1,
LANDLOCK_KEY_INODE))
@@ -952,7 +965,7 @@ static int current_check_access_path(const struct path *const path,
};
const struct landlock_cred_security *const subject =
landlock_get_applicable_subject(current_cred(), masks, NULL);
- struct layer_access_masks layer_masks;
+ struct layer_masks layer_masks;
struct landlock_request request = {};
if (!subject)
@@ -1029,7 +1042,7 @@ static access_mask_t maybe_remove(const struct dentry *const dentry)
static bool collect_domain_accesses(const struct landlock_ruleset *const domain,
const struct dentry *const mnt_root,
struct dentry *dir,
- struct layer_access_masks *layer_masks_dom)
+ struct layer_masks *layer_masks_dom)
{
bool ret = false;
@@ -1135,8 +1148,7 @@ static int current_check_refer_path(struct dentry *const old_dentry,
access_mask_t access_request_parent1, access_request_parent2;
struct path mnt_dir;
struct dentry *old_parent;
- struct layer_access_masks layer_masks_parent1 = {},
- layer_masks_parent2 = {};
+ struct layer_masks layer_masks_parent1 = {}, layer_masks_parent2 = {};
struct landlock_request request1 = {}, request2 = {};
if (!subject)
@@ -1202,7 +1214,6 @@ static int current_check_refer_path(struct dentry *const old_dentry,
allow_parent2 = collect_domain_accesses(subject->domain, mnt_dir.dentry,
new_dir->dentry,
&layer_masks_parent2);
-
if (allow_parent1 && allow_parent2)
return 0;
@@ -1580,7 +1591,7 @@ static int hook_path_truncate(const struct path *const path)
*/
static void unmask_scoped_access(const struct landlock_ruleset *const client,
const struct landlock_ruleset *const server,
- struct layer_access_masks *const masks,
+ struct layer_masks *const masks,
const access_mask_t access)
{
int client_layer, server_layer;
@@ -1621,9 +1632,9 @@ static void unmask_scoped_access(const struct landlock_ruleset *const client,
server_walker = server_walker->parent;
for (; client_layer >= 0; client_layer--) {
- if (masks->access[client_layer] & access &&
+ if (masks->layers[client_layer].access & access &&
client_walker == server_walker)
- masks->access[client_layer] &= ~access;
+ masks->layers[client_layer].access &= ~access;
client_walker = client_walker->parent;
server_walker = server_walker->parent;
@@ -1635,7 +1646,7 @@ static int hook_unix_find(const struct path *const path, struct sock *other,
{
const struct landlock_ruleset *dom_other;
const struct landlock_cred_security *subject;
- struct layer_access_masks layer_masks;
+ struct layer_masks layer_masks;
struct landlock_request request = {};
static const struct access_masks fs_resolve_unix = {
.fs = LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
@@ -1739,7 +1750,7 @@ static bool is_device(const struct file *const file)
static int hook_file_open(struct file *const file)
{
- struct layer_access_masks layer_masks = {};
+ struct layer_masks layer_masks = {};
access_mask_t open_access_request, full_access_request, allowed_access,
optional_access;
const struct landlock_cred_security *const subject =
@@ -1780,8 +1791,8 @@ static int hook_file_open(struct file *const file)
* are still unfulfilled in any of the layers.
*/
allowed_access = full_access_request;
- for (size_t i = 0; i < ARRAY_SIZE(layer_masks.access); i++)
- allowed_access &= ~layer_masks.access[i];
+ for (size_t i = 0; i < ARRAY_SIZE(layer_masks.layers); i++)
+ allowed_access &= ~layer_masks.layers[i].access;
}
/*
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index a4d908b240a2..08d5f2f6d321 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -31,6 +31,9 @@
#define LANDLOCK_MASK_SCOPE ((LANDLOCK_LAST_SCOPE << 1) - 1)
#define LANDLOCK_NUM_SCOPE __const_hweight64(LANDLOCK_MASK_SCOPE)
+#define LANDLOCK_NUM_ACCESS_MAX \
+ MAX(MAX(LANDLOCK_NUM_ACCESS_FS, LANDLOCK_NUM_ACCESS_NET), LANDLOCK_NUM_SCOPE)
+
#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_TSYNC
#define LANDLOCK_MASK_RESTRICT_SELF ((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1)
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 0e697403eca9..d472e6cab12f 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -49,7 +49,7 @@ static int current_check_access_socket(struct socket *const sock,
{
unsigned short sock_family;
__be16 port;
- struct layer_access_masks layer_masks = {};
+ struct layer_masks layer_masks = {};
const struct landlock_rule *rule;
struct landlock_id id = {
.type = LANDLOCK_KEY_NET_PORT,
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 181df7736bb9..91948e406e69 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -628,7 +628,7 @@ landlock_find_rule(const struct landlock_ruleset *const ruleset,
* remaining unfulfilled access rights and masks has no leftover set bits).
*/
bool landlock_unmask_layers(const struct landlock_rule *const rule,
- struct layer_access_masks *masks)
+ struct layer_masks *masks)
{
if (!masks)
return true;
@@ -649,11 +649,17 @@ bool landlock_unmask_layers(const struct landlock_rule *const rule,
const struct landlock_layer *const layer = &rule->layers[i];
/* Clear the bits where the layer in the rule grants access. */
- masks->access[layer->level - 1] &= ~layer->access;
+ masks->layers[layer->level - 1].access &= ~layer->access;
+
+#ifdef CONFIG_AUDIT
+ /* Collect rule flags for each layer. */
+ if (layer->flags.quiet)
+ masks->layers[layer->level - 1].quiet = true;
+#endif /* CONFIG_AUDIT */
}
- for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
- if (masks->access[i])
+ for (size_t i = 0; i < ARRAY_SIZE(masks->layers); i++) {
+ if (masks->layers[i].access)
return false;
}
return true;
@@ -668,6 +674,7 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset,
*
* Populates @masks such that for each access right in @access_request,
* the bits for all the layers are set where this access right is handled.
+ * Rule flags are also zeroed.
*
* @domain: The domain that defines the current restrictions.
* @access_request: The requested access rights to check.
@@ -680,7 +687,7 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset,
access_mask_t
landlock_init_layer_masks(const struct landlock_ruleset *const domain,
const access_mask_t access_request,
- struct layer_access_masks *const masks,
+ struct layer_masks *const masks,
const enum landlock_key_type key_type)
{
access_mask_t handled_accesses = 0;
@@ -709,11 +716,19 @@ landlock_init_layer_masks(const struct landlock_ruleset *const domain,
for (size_t i = 0; i < domain->num_layers; i++) {
const access_mask_t handled = get_access_mask(domain, i);
- masks->access[i] = access_request & handled;
- handled_accesses |= masks->access[i];
+ masks->layers[i].access = access_request & handled;
+ handled_accesses |= masks->layers[i].access;
+#ifdef CONFIG_AUDIT
+ masks->layers[i].quiet = false;
+#endif /* CONFIG_AUDIT */
+ }
+ for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->layers);
+ i++) {
+ masks->layers[i].access = 0;
+#ifdef CONFIG_AUDIT
+ masks->layers[i].quiet = false;
+#endif /* CONFIG_AUDIT */
}
- for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++)
- masks->access[i] = 0;
return handled_accesses;
}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 889f4b30301a..d0fa9af46a2c 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -29,7 +29,18 @@ struct landlock_layer {
/**
* @level: Position of this layer in the layer stack. Starts from 1.
*/
- u16 level;
+ u8 level;
+ /**
+ * @flags: Bitfield for special flags attached to this rule.
+ */
+ struct {
+ /**
+ * @quiet: Suppresses denial logs for the object covered by this
+ * rule in this domain. For filesystem rules, this inherits down
+ * the file hierarchy.
+ */
+ u8 quiet : 1;
+ } flags;
/**
* @access: Bitfield of allowed actions on the kernel object. They are
* relative to the object type (e.g. %LANDLOCK_ACTION_FS_READ).
@@ -302,12 +313,12 @@ landlock_get_scope_mask(const struct landlock_ruleset *const ruleset,
}
bool landlock_unmask_layers(const struct landlock_rule *const rule,
- struct layer_access_masks *masks);
+ struct layer_masks *masks);
access_mask_t
landlock_init_layer_masks(const struct landlock_ruleset *const domain,
const access_mask_t access_request,
- struct layer_access_masks *masks,
+ struct layer_masks *masks,
const enum landlock_key_type key_type);
#endif /* _SECURITY_LANDLOCK_RULESET_H */
--
2.54.0
^ permalink raw reply related
* [PATCH v11 0/9] Implement LANDLOCK_ADD_RULE_QUIET
From: Tingmao Wang @ 2026-06-12 1:48 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Tingmao Wang, Günther Noack, Justin Suess, Jan Kara,
Abhinav Saxena, linux-security-module
Hi,
This is the v11 of the "quiet flag" series, implementing the feature as
proposed in [1].
v10: https://lore.kernel.org/all/cover.1780272022.git.m@maowtm.org/
v9: https://lore.kernel.org/all/cover.1779843375.git.m@maowtm.org/
v8: https://lore.kernel.org/all/cover.1775490344.git.m@maowtm.org/
v7: https://lore.kernel.org/all/cover.1766330134.git.m@maowtm.org/
v6: https://lore.kernel.org/all/cover.1765040503.git.m@maowtm.org/
v5: https://lore.kernel.org/all/cover.1763931318.git.m@maowtm.org/
v4: https://lore.kernel.org/all/cover.1763330228.git.m@maowtm.org/
v3: https://lore.kernel.org/all/cover.1761511023.git.m@maowtm.org/
v2: https://lore.kernel.org/all/cover.1759686613.git.m@maowtm.org/
v1: https://lore.kernel.org/all/cover.1757376311.git.m@maowtm.org/
v10..v11:
- doc and style fixes
- s/audit log/log/
- u32 flags arguments
- stop using bitfields for quiet_optional_accesses and fown_layer
- fixed bitfield using different types
- use u8 instead of bool in struct landlock_layer
- sandboxer: merge LL_{FS,NET,SCOPED}_QUIET_ACCESS into a single
LL_QUIET_ACCESS with more descriptive values.
- selftests: also test the quiet_access_net and quiet_scoped fields.
(Kept ABI version at 10)
All text following this line is unchanged except for the demo
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
v9..v10:
- clang-format on .h files
- doc changes
- remove stray __attribute__((fallthrough));
v8..v9:
- Refactor to store the collected rule flags in layer_masks instead
(renamed from layer_access_masks). Got rid of layer_mask_t again.
- Rebase sandboxer and net_tests on top of UDP support, resolving
conflicts
- Additional small changes, noted in each patch
v7..v8:
- Rebase to mic/next
- Re-introduced layer_mask_t due to need in first patch
- Plumb through rule flags in hook_unix_find()
- Some selftests patches were not properly clang-format'd, fixed now.
- Minor env var handling change in sandboxer
- Fix selftests use of audit_count_records() without EXPECT_EQ
v6..v7:
- Remove "landlock: Fix wrong type usage" (merged)
- Revert back to taking rule_flags separately from landlock_request until
we call landlock_log_denial (https://lore.kernel.org/all/20251219.ahn3aiJuKahb@digikod.net/)
- Rebase to mic/next
v5..v6 rebases on top of the new simpler disconnected directory handling,
change some bools into u32, and fix some typo and style.
v4..v5 addresses review feedbacks, most significantly:
- reduces code changes by pushing rule_flags into landlock_request.
- adding test cases for two layers handling different access bits.
v3..v4 is a one-character formatting change, plus more tests.
We now have 5 patches for the selftest - I'm happy to squash it into one
depending on preference (and happy for Mickaël to do the squash if no
other feedback):
- selftests/landlock: Replace hard-coded 16 with a constant
- selftests/landlock: add tests for quiet flag with fs rules
- selftests/landlock: add tests for quiet flag with net rules
- selftests/landlock: Add tests for quiet flag with scope
- selftests/landlock: Add tests for invalid use of quiet flag
v2..v3:
Not much has changed in the actual functionality except various comment,
typing, asserts and general style fixes based on feedback. The major new
thing here is tests (a bit of KUnit squashed into the optional access
commit, a lot of selftests especially in fs_tests.c).
The added fs_tests should exercise code path for optional and non-optional
access, renames, and mountpoint and disconnected directory handling. I
will add the above missing bits to v4.
Removed:
- "Implement quiet for optional accesses"
(squashed into "landlock: Suppress logging when quiet flag is present")
Old feature summary below:
The quiet flag allows a sandboxer to suppress audit logs for uninteresting
denials. The flag can be set on objects and inherits downward in the
filesystem hierarchy. On a denial, the youngest denying layer's quiet
flag setting decides whether to audit. The motivation for this feature is
to reduce audit noise, and also prepare for a future supervisor feature
which will use this bit to suppress supervisor notifications.
This patch introduces a new quiet access mask in the ruleset_attr, which
gets eventually stored in the hierarchy. This allows the user to specify
which access should be affected by quiet bits. One can then, for example,
make it such that read accesses to certain files are not audited (but
still denied), but all writes are still audited, regardless of location.
The sandboxer is extended to show example usage of this feature,
supporting quieting filesystem, network and scope accesses.
Demo:
/# LL_FS_RO=/usr LL_FS_RW= LL_FORCE_LOG=1 LL_FS_QUIET=/dev:/tmp:/etc LL_QUIET_ACCESS=read ./sandboxer bash
...
audit: type=1423 audit(1759680175.562:195): domain=15bb25f6b blockers=fs.write_file,fs.read_file path="/dev/tty" dev="devtmpfs" ino=11
^^^^^^^^
# note: because write is not quieted, we see the above line. blockers
# contains read as well since that's the originally requested access.
audit: type=1424 audit(1759680175.562:195): domain=15bb25f6b status=allocated mode=enforcing pid=616 uid=0 exe="/sandboxer" comm="sandboxer"
audit: type=1300 audit(1759680175.562:195): arch=c000003e syscall=257 success=no exit=-13 a0=ffffffffffffff9c a1=5565c86113d1 a2=802 a3=0 items=0 ppid=605 pid=616 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=4294967295 comm="bash" exe="/usr/bin/bash" key=(null)
audit: type=1327 audit(1759680175.562:195): proctitle="bash"
bash: cannot set terminal process group (605): Inappropriate ioctl for device
bash: no job control in this shell
bash: /etc/bash.bashrc: Permission denied
audit: type=1423 audit(1759680175.570:196): domain=15bb25f6b blockers=fs.read_file path="/.bash_history" dev="virtiofs" ino=36963
^^^^^^^^
# read outside /dev:/tmp:/etc - not quieted
audit: type=1300 audit(1759680175.570:196): arch=c000003e syscall=257 success=no exit=-13 a0=ffffffffffffff9c a1=5565c868e400 a2=0 a3=0 items=0 ppid=605 pid=616 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=4294967295 comm="bash" exe="/usr/bin/bash" key=(null)
audit: type=1327 audit(1759680175.570:196): proctitle="bash"
audit: type=1423 audit(1759680175.570:197): domain=15bb25f6b blockers=fs.read_file path="/.bash_history" dev="virtiofs" ino=36963
audit: type=1300 audit(1759680175.570:197): arch=c000003e syscall=257 success=no exit=-13 a0=ffffffffffffff9c a1=5565c868e400 a2=0 a3=0 items=0 ppid=605 pid=616 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=4294967295 comm="bash" exe="/usr/bin/bash" key=(null)
audit: type=1327 audit(1759680175.570:197): proctitle="bash"
bash-5.2# head /etc/passwd
head: cannot open '/etc/passwd' for reading: Permission denied
^^^^^^^^
# reads to /etc are quieted
bash-5.2# echo evil >> /etc/passwd
bash: /etc/passwd: Permission denied
audit: type=1423 audit(1759680227.030:198): domain=15bb25f6b blockers=fs.write_file path="/etc/passwd" dev="virtiofs" ino=790
^^^^^^^^
# writes are not quieted
audit: type=1300 audit(1759680227.030:198): arch=c000003e syscall=257 success=no exit=-13 a0=ffffffffffffff9c a1=5565c86ab030 a2=441 a3=1b6 items=0 ppid=605 pid=616 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=4294967295 comm="bash" exe="/usr/bin/bash" key=(null)
audit: type=1327 audit(1759680227.030:198): proctitle="bash"
Design:
- The user can set the quiet flag for a layer on any part of the fs
hierarchy (whether it allows any access on it or not), and the flag
inherits down (no support for "cancelling" the inheritance of the flag
in specific subdirectories).
- The youngest layer that denies a request gets to decide whether the
denial is audited or not. This means that a compromised binary, for
example, cannot "turn off" Landlock auditing when it tries to access
files, unless it denies access to the files itself. There is some
debate to be had on whether, if a parent layer sets the quiet flag, but
the request is denied by a deeper layer, whether Landlock should still
audit anyway (since the rule author of the child layer likely did not
expect the denial, so it would be good diagnostic). The current
approach is to ignore the quiet on the parent layer and audit anyway.
[1]: https://github.com/landlock-lsm/linux/issues/44#issuecomment-2876500918
Kind regards,
Tingmao
Tingmao Wang (9):
landlock: Add a place for flags to layer rules
landlock: Add API support and docs for the quiet flags
landlock: Suppress logging when quiet flag is present
samples/landlock: Add quiet flag support to sandboxer
selftests/landlock: Replace hard-coded 16 with a constant
selftests/landlock: add tests for quiet flag with fs rules
selftests/landlock: add tests for quiet flag with net rules
selftests/landlock: Add tests for quiet flag with scope
selftests/landlock: Add tests for invalid use of quiet flag
Documentation/admin-guide/LSM/landlock.rst | 9 +-
Documentation/userspace-api/landlock.rst | 14 +
include/uapi/linux/landlock.h | 61 +
samples/landlock/sandboxer.c | 138 +-
security/landlock/access.h | 44 +-
security/landlock/audit.c | 288 +-
security/landlock/audit.h | 3 +-
security/landlock/domain.c | 57 +-
security/landlock/domain.h | 11 +-
security/landlock/fs.c | 157 +-
security/landlock/fs.h | 21 +-
security/landlock/limits.h | 3 +
security/landlock/net.c | 22 +-
security/landlock/net.h | 5 +-
security/landlock/ruleset.c | 45 +-
security/landlock/ruleset.h | 29 +-
security/landlock/syscalls.c | 71 +-
tools/testing/selftests/landlock/audit_test.c | 27 +-
tools/testing/selftests/landlock/base_test.c | 118 +-
tools/testing/selftests/landlock/common.h | 2 +
tools/testing/selftests/landlock/fs_test.c | 2450 ++++++++++++++++-
tools/testing/selftests/landlock/net_test.c | 138 +-
.../landlock/scoped_abstract_unix_test.c | 77 +-
23 files changed, 3576 insertions(+), 214 deletions(-)
base-commit: a6f0a6f5377fae42a8028f63c89d544c68f24b60
--
2.54.0
^ permalink raw reply
* Re: [PATCH v10 4/9] samples/landlock: Add quiet flag support to sandboxer
From: Tingmao Wang @ 2026-06-12 1:12 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Günther Noack, Justin Suess, Jan Kara, Abhinav Saxena,
linux-security-module
In-Reply-To: <20260609.Pi8aiyae5nee@digikod.net>
On 6/8/26 23:41, Mickaël Salaün wrote:
> As for LL_FORCE_LOG, using a QUIET flag not supported should exit with
> an error.
As in, if the current kernel doesn't support quiet flags? Added check.
>
> On Mon, Jun 01, 2026 at 01:00:38AM +0100, Tingmao Wang wrote:
>> Adds ability to set which access bits to quiet via LL_*_QUIET_ACCESS (FS,
>> NET or SCOPED), and attach quiet flags to individual objects via
>> LL_*_QUIET for FS and NET.
>>
>> Signed-off-by: Tingmao Wang <m@maowtm.org>
>> ---
>>
>> Changes in v10:
>> - Remove stray __attribute__((fallthrough)); (Thanks Justin for
>> spotting)
>>
>> Changes in v9:
>> - Add udp connect / bind quiet flag support
>>
>> Changes in v8:
>> - Rebase on top of mic/next
>> - populate_ruleset_net() already does not require the env var to be
>> present, so remove redundant comment and check above
>> populate_ruleset_net(ENV_NET_QUIET_NAME, ...).
>>
>> Changes in v6:
>> - Make populate_ruleset_{fs,net} take a flags argument instead of a bool
>> quiet (suggested by Justin Suess)
>> - Fix if braces style
>>
>> Changes in v3:
>> - Minor change to the above commit message.
>>
>> Changes in v2:
>> - Added new environment variables to control which quiet access bits to
>> set on the rule, and populate quiet_access_* from it.
>> - Added support for quieting net rules and scoped access. Renamed patch
>> title.
>> - Increment ABI version
>>
>> samples/landlock/sandboxer.c | 133 ++++++++++++++++++++++++++++++++---
>> 1 file changed, 122 insertions(+), 11 deletions(-)
>>
>> diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
>> index 94e399e6b146..73a81ecd3696 100644
>> --- a/samples/landlock/sandboxer.c
>> +++ b/samples/landlock/sandboxer.c
>> @@ -58,9 +58,14 @@ static inline int landlock_restrict_self(const int ruleset_fd,
>>
>> #define ENV_FS_RO_NAME "LL_FS_RO"
>> #define ENV_FS_RW_NAME "LL_FS_RW"
>> +#define ENV_FS_QUIET_NAME "LL_FS_QUIET"
>> +#define ENV_FS_QUIET_ACCESS_NAME "LL_FS_QUIET_ACCESS"
>> #define ENV_TCP_BIND_NAME "LL_TCP_BIND"
>> #define ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT"
>> +#define ENV_NET_QUIET_NAME "LL_NET_QUIET"
>> +#define ENV_NET_QUIET_ACCESS_NAME "LL_NET_QUIET_ACCESS"
>> #define ENV_SCOPED_NAME "LL_SCOPED"
>> +#define ENV_SCOPED_QUIET_ACCESS_NAME "LL_SCOPED_QUIET_ACCESS"
>> #define ENV_FORCE_LOG_NAME "LL_FORCE_LOG"
>> #define ENV_UDP_BIND_NAME "LL_UDP_BIND"
>> #define ENV_UDP_CONNECT_SEND_NAME "LL_UDP_CONNECT_SEND"
>> @@ -119,7 +124,7 @@ static int parse_path(char *env_path, const char ***const path_list)
>> /* clang-format on */
>>
>> static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd,
>> - const __u64 allowed_access)
>> + const __u64 allowed_access, __u32 flags)
>> {
>> int num_paths, i, ret = 1;
>> char *env_path_name;
>> @@ -169,7 +174,7 @@ static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd,
>> if (!S_ISDIR(statbuf.st_mode))
>> path_beneath.allowed_access &= ACCESS_FILE;
>> if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
>> - &path_beneath, 0)) {
>> + &path_beneath, flags)) {
>> fprintf(stderr,
>> "Failed to update the ruleset with \"%s\": %s\n",
>> path_list[i], strerror(errno));
>> @@ -187,7 +192,7 @@ static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd,
>> }
>>
>> static int populate_ruleset_net(const char *const env_var, const int ruleset_fd,
>> - const __u64 allowed_access)
>> + const __u64 allowed_access, __u32 flags)
>> {
>> int ret = 1;
>> char *env_port_name, *env_port_name_next, *strport;
>> @@ -215,7 +220,7 @@ static int populate_ruleset_net(const char *const env_var, const int ruleset_fd,
>> }
>> net_port.port = port;
>> if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
>> - &net_port, 0)) {
>> + &net_port, flags)) {
>> fprintf(stderr,
>> "Failed to update the ruleset with port \"%llu\": %s\n",
>> net_port.port, strerror(errno));
>> @@ -303,6 +308,58 @@ static bool check_ruleset_scope(const char *const env_var,
>>
>> /* clang-format on */
>>
>> +static int add_quiet_access(__u64 *const quiet_access,
>> + const __u64 handled_access,
>> + const char *const env_var, const bool default_all)
>> +{
>> + char *env_quiet_access, *env_quiet_access_next, *str_access;
>> +
>> + if (default_all)
>> + *quiet_access = handled_access;
>> + else
>> + *quiet_access = 0;
>> +
>> + env_quiet_access = getenv(env_var);
>> + if (!env_quiet_access)
>> + return 0;
>> +
>> + env_quiet_access = strdup(env_quiet_access);
>> + env_quiet_access_next = env_quiet_access;
>> + unsetenv(env_var);
>> + *quiet_access = 0;
>> +
>> + while ((str_access = strsep(&env_quiet_access_next, ENV_DELIMITER))) {
>> + if (strcmp(str_access, "") == 0)
>> + continue;
>> + else if (strcmp(str_access, "r") == 0)
>> + *quiet_access |= ACCESS_FS_ROUGHLY_READ;
>> + else if (strcmp(str_access, "w") == 0)
>> + *quiet_access |= ACCESS_FS_ROUGHLY_WRITE;
>> + else if (strcmp(str_access, "b") == 0)
>> + *quiet_access |= LANDLOCK_ACCESS_NET_BIND_TCP;
>
> What happen if we set "b" in LL_FS_QUIET_ACCESS?
>
>> + else if (strcmp(str_access, "c") == 0)
>> + *quiet_access |= LANDLOCK_ACCESS_NET_CONNECT_TCP;
>> + else if (strcmp(str_access, "ub") == 0)
>
> I don't really like these access-right names, they are not consistent.
> All these env variables add a lot of complexity too. What about just
> being able to quiet a path or a port? That would mean renaming
> LL_FS_QUIET_ACCESS to LL_FS_QUIET.
I'm happy to remove LL_{FS,NET}_QUIET_ACCESS and just have
LL_{FS,NET}_QUIET quiet all access, but then we lose the ability to demo
"quiet only read but still log write" via the sandboxer.
I do agree the "b in LL_FS_QUIET_ACCESS" case is weird, so maybe we can
just have one LL_QUIET_ACCESS variable?
Also, the names are like this because I tried to mimic the one-letter
scoped access, but we could use e.g.
LL_QUIET_ACCESS=read:write:tcp_bind:tcp_connect:udp_bind:udp_connect:abstract_unix_socket:signal
Do you want to keep the ability to specify LL_QUIET_ACCESS? (I think it's
useful for demo, since I expect "quiet read but log write denials" to be
quite common.)
>
> Anyway, all should be unsetenv() unconditionally.
I think they are already all unsetenv()'d already (checked with
/usr/bin/env), do you mean to make them not conditional on the env
existing in the first place? I followed how populate_ruleset_{fs,net}
works and those two functions currently do conditional unsetenv(),
althouygh check_ruleset_scope() does it unconditionally.
>
>> + *quiet_access |= LANDLOCK_ACCESS_NET_BIND_UDP;
>> + else if (strcmp(str_access, "uc") == 0)
>> + *quiet_access |= LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP;
>> + else if (strcmp(str_access, "a") == 0)
>> + *quiet_access |= LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET;
>> + else if (strcmp(str_access, "s") == 0)
>> + *quiet_access |= LANDLOCK_SCOPE_SIGNAL;
>> + else {
>> + fprintf(stderr, "Unknown quiet access \"%s\"\n",
>> + str_access);
>> + free(env_quiet_access);
>> + return -1;
>> + }
>> + }
>> +
>> + free(env_quiet_access);
>> + *quiet_access &= handled_access;
>> + return 0;
>> +}
>> +
>> #define LANDLOCK_ABI_LAST 10
>>
>> #define XSTR(s) #s
>> [...]
^ permalink raw reply
* Re: [PATCH v10 3/9] landlock: Suppress logging when quiet flag is present
From: Tingmao Wang @ 2026-06-12 1:11 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Günther Noack, Justin Suess, Jan Kara, Abhinav Saxena,
linux-security-module
In-Reply-To: <20260608.daeshu7Leequ@digikod.net>
On 6/8/26 23:41, Mickaël Salaün wrote:
> On Mon, Jun 01, 2026 at 01:00:37AM +0100, Tingmao Wang wrote:
>> [...]
>> @@ -265,20 +268,33 @@ get_layer_from_deny_masks(access_mask_t *const access_request,
>> BITS_PER_TYPE(access_mask_t)) {
>> if (access_req & BIT(access_bit)) {
>> const size_t layer =
>> - (deny_masks >> (access_index * 4)) &
>> + (deny_masks >>
>> + (access_index *
>> + HWEIGHT(LANDLOCK_MAX_NUM_LAYERS - 1))) &
>> (LANDLOCK_MAX_NUM_LAYERS - 1);
>> + const bool layer_has_quiet =
>> + !!(quiet_optional_accesses & BIT(access_index));
>>
>> if (layer > youngest_layer) {
>> youngest_layer = layer;
>> missing = BIT(access_bit);
>> + should_quiet = layer_has_quiet;
>> } else if (layer == youngest_layer) {
>> missing |= BIT(access_bit);
>> + /*
>> + * Whether the layer has rules with quiet flag covering
>> + * the file accessed does not depend on the access, and so
>> + * the following WARN_ON_ONCE() should not fail.
>> + */
>> + WARN_ON_ONCE(should_quiet && !layer_has_quiet);
>
> WARN_ON_ONCE(should_quiet != layer_has_quiet);
That will fail when layer 0 has quiet flag on the object, at which point
since youngest_layer starts from 0, we will reach this branch with
should_quiet initialized earlier to false, but layer_has_quiet == true.
Also, if should_quiet != layer_has_quiet is always false here, then the
next line is not necessary.
>
>> + should_quiet = layer_has_quiet;
Would it be clearer to do should_quiet |= layer_has_quiet, mimicking the
"missing |= BIT(access_bit)"? (The result is the same)
>> }
>> }
>> access_index++;
>> }
>>
>> *access_request = missing;
>> + *quiet = should_quiet;
>> return youngest_layer;
>> }
>>
>> [...]
>> diff --git a/security/landlock/fs.h b/security/landlock/fs.h
>> index cb7e654933ac..d0fca7da2466 100644
>> --- a/security/landlock/fs.h
>> +++ b/security/landlock/fs.h
>> @@ -63,11 +63,20 @@ struct landlock_file_security {
>> * _LANDLOCK_ACCESS_FS_OPTIONAL).
>> */
>> deny_masks_t deny_masks;
>> + /**
>> + * @quiet_optional_accesses: Stores which optional accesses are
>> + * covered by quiet rules within the layer referred to in deny_masks,
>> + * one access per bit. Does not take into account whether the quiet
>> + * access bits are actually set in the layer's corresponding
>> + * landlock_hierarchy.
>> + */
>> + optional_access_t quiet_optional_accesses
>> + : HWEIGHT(_LANDLOCK_ACCESS_FS_OPTIONAL);
>> /**
>> * @fown_layer: Layer level of @fown_subject->domain with
>> * LANDLOCK_SCOPE_SIGNAL.
>> */
>> - u8 fown_layer;
>> + u8 fown_layer : 4;
>
> Please don't hardcode such size.
>
> Anyway, fown_layer can be updated concurrently (holding a lock), so we
> should not convert it to a bitfield.
>
>> #endif /* CONFIG_AUDIT */
>>
>> /**
>
>> @@ -82,12 +91,6 @@ struct landlock_file_security {
>>
>> #ifdef CONFIG_AUDIT
>>
>> -/* Makes sure all layers can be identified. */
>> -/* clang-format off */
>> -static_assert((typeof_member(struct landlock_file_security, fown_layer))~0 >=
>> - LANDLOCK_MAX_NUM_LAYERS);
>> -/* clang-format off */
>> -
>> #endif /* CONFIG_AUDIT */
>
> Remaining useless ifdef/endif.
Since we are not using bitfield for fown_layer anymore, I've also turned
quiet_optional_accesses (u8) into a non-bitfield. Then I reverted this
deletion and added
static_assert((typeof_member(struct landlock_file_security,
quiet_optional_accesses)) ~0 >=
HWEIGHT(_LANDLOCK_ACCESS_FS_OPTIONAL));
>
>> [...]
^ permalink raw reply
* Re: [PATCH v10 1/9] landlock: Add a place for flags to layer rules
From: Tingmao Wang @ 2026-06-12 1:11 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Günther Noack, Justin Suess, Jan Kara, Abhinav Saxena,
linux-security-module
In-Reply-To: <20260608.ni6daelae9Qu@digikod.net>
On 6/8/26 23:40, Mickaël Salaün wrote:
> On Mon, Jun 01, 2026 at 01:00:35AM +0100, Tingmao Wang wrote:
>> [...]
>> diff --git a/security/landlock/access.h b/security/landlock/access.h
>> index c19d5bc13944..42d8b5134358 100644
>> --- a/security/landlock/access.h
>> +++ b/security/landlock/access.h
>> @@ -62,18 +62,39 @@ static_assert(sizeof(typeof_member(union access_masks_all, masks)) ==
>> sizeof(typeof_member(union access_masks_all, all)));
>>
>> /**
>> - * struct layer_access_masks - A boolean matrix of layers and access rights
>> - *
>> - * This has a bit for each combination of layer numbers and access rights.
>> - * During access checks, it is used to represent the access rights for each
>> - * layer which still need to be fulfilled. When all bits are 0, the access
>> - * request is considered to be fulfilled.
>> + * struct layer_mask - The unfulfilled access rights and rule flags for
>
> This struct could be used to store "fulfilled" access rights too. The
> previous description is more accurate. Please keep most of the previous
> description too and adjust as needed.
In v10 I attempted to move this description to the doc strings for the
fields, but happy to move back (done in v11).
^ permalink raw reply
* Re: [PATCH] Add LoadPin support for eBPF program loading
From: David Windsor @ 2026-06-12 0:08 UTC (permalink / raw)
To: alex.roberts109, Kees Cook, Paul Moore, James Morris,
Serge E . Hallyn
Cc: linux-kernel, linux-security-module, bpf, Alexei Starovoitov,
KP Singh, David Windsor
In-Reply-To: <20260611-b4-rfc-loadpin-ebpf-v1-1-11a6c8e6170d@outlook.com>
On Thu, Jun 11, 2026 at 01:59:10PM -0500, Alex Roberts wrote:
> +static int loadpin_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
> + struct bpf_token *token, bool is_kernel)
> +{
> + int res = 0;
> + struct file *exe_file = NULL;
> + struct mm_struct *mm = current->mm;
> +
> + if (is_kernel || !mm)
> + return 0;
> +
> + exe_file = get_mm_exe_file(mm);
> + if (!exe_file)
> + return 0;
> +
> + res = loadpin_check(exe_file, READING_EBPF);
Why are we checking current here? IIUC this will be whoever calls
bpf(2), which would be the loader, which would then be able to load bpf
programs from an untrusted source.
In the kmod case loadpin_check() sees the .ko itself.
^ permalink raw reply
* Re: [PATCH bpf-next 0/5] Verify BPF signed loader at load time
From: Paul Moore @ 2026-06-11 22:56 UTC (permalink / raw)
To: Daniel Borkmann
Cc: ast, kpsingh, James.Bottomley, bboscaccy, memxor, torvalds, bpf,
linux-security-module
In-Reply-To: <20260610230329.727075-1-daniel@iogearbox.net>
On Wed, Jun 10, 2026 at 7:03 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> The BPF signing scheme signs a light skeleton's loader program and lets
> the loader vouch for everything else: bpftool bakes the SHA256 of the
> metadata map into the loader's instructions, signs the instructions, and
> the loader compares the (frozen, exclusive) map against that hash from
> within BPF once it runs. The construction is sound as a trusted hash
> chain, but the kernel itself never attests the metadata, and that split
> has been the recurring objection from the LSM / integrity side since the
> scheme was proposed.
>
> This proposal closes both gaps by having the kernel verify the metadata
> at BPF_PROG_LOAD time, before the LSM admission hook and before the
> verifier, /without/ growing the UAPI. A signed loader binds its metadata
> map(s) through the existing fd_array/fd_array_cnt, and exclusive maps
> are already bound to the loader's digest via excl_prog_hash. When a
> signature is present, the kernel collects the exclusive maps from the
> fd_array and appends their frozen contents to the instructions before
> PKCS#7 verification, so the signature covers ...
>
> insns || metadata_0 || metadata_1 || [...]
>
> ... in fd_array order. The in-loader hash check is dropped from the
> gen_loader entirely: generated loaders carry no verification logic
> anymore, and signing or verifying a skeleton becomes an ordinary CMS
> operation over bytes that sit verbatim in the skeleton, reproducible
> offline. A signed program is either BPF_SIG_UNSIGNED or BPF_SIG_VERIFIED
> with nothing in between.
I'll be honest and say I'm a bit surprised to see this patchset,
especially since KP and Alexei argued so strongly against this
signature scheme, preferring KP's scheme where the loader verified the
maps. I'd be curious to hear the reason for the change of heart if
you can share it. Regardless of the motivation for this change, I
obviously think this is a significant improvement over KP's signature
scheme which shipped in Linux v6.18.
I also think it is worth mentioning the similarities to work Blaise
did before the most recent Hornet version:
https://lore.kernel.org/linux-security-module/20250929213520.1821223-1-bboscaccy@linux.microsoft.com/
While Blaise's patchset added to the UAPI, that was done simply to
retain compatibility with KP's signature scheme; your patchset does
without any UAPI additions, but loses compatibility with existing
signed lskels. Beyond that, the basic signature scheme between
Blaise's patchset and what you are proposing appears the same ...
which is a good thing as far as I'm concerned.
--
paul-moore.com
^ permalink raw reply
* [PATCH] Add LoadPin support for eBPF program loading
From: Alex Roberts via B4 Relay @ 2026-06-11 18:59 UTC (permalink / raw)
To: Kees Cook, Paul Moore, James Morris, Serge E. Hallyn
Cc: linux-kernel, linux-security-module, bpf, Alexei Starovoitov,
KP Singh, Alex Roberts
From: Alex Roberts <alex.roberts109@outlook.com>
Add LoadPin LSM hook to bpf_prog_load to prevent loading of BPF
programs from untrusted filesystems
---
There have been several efforts to provide a trust mechanism for eBPF programs –
particularly in the form of a signed program. After many discussions, BPF
Signing became supported [1].
This patch series intends to provide an alternative trust mechanism for eBPF
programs using LoadPin and the LoadPin dm-verity support. The approach is to pin
eBPF userspace loaders to a single filesystem or to one or more dm-verity
integrity protected filesystems. This is similar to the existing Loadpin+verity
implementation and approach of e.g., loading unsigned kernel modules or kexec
images from a pinned trusted filesystem [2].
When a userspace application attempts to load a BPF program, LoadPin
first checks whether the exe_file is located on the pinned root, if so
bpf_prog_load is allowed. Otherwise, Loadpin denies bpf_prog_load. Additionally,
if verity support is enabled, LoadPin determines whether the exe_file is located
on a verity backed device and whether the root digest of that device is in the
list of trusted digests. bpf_prog_load is allowed if the verity device has a
trusted root digest.
Background:
In a secure boot environment, secure boot can be extended to the root filesystem
by means of dm-verity. Placing userspace eBPF programs on trusted filesystems
and restricting their use to a trusted filesystem implicitly extends the trust
to the underlying BPF program itself, without having to sign the bytecode.
Rather than sign each program and load keys into the kernel keyring, it can be
sufficient to sign the filesystem.
Additional Considerations:
Because the userspace loader itself defines the filesystem to pin from, it
should be acknowledged that this does not necessarily solve the dynamically
generated eBPF usecase. For example, if the pinned filesystem includes bpftrace
or other userspace applications that dynamically generate and load eBPF
bytecode, this trust mechanism does not apply. But for systems that either
1) remove/do-not-provide dynamically generating programs or
2) loadpin filesystem(s) that exclude such programs,
then it can be reasoned that only trusted bytecode is loaded.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?h=for-next&id=58a5820582e4c809dd26b3f2d396cf072411d6e8
[2] https://lore.kernel.org/lkml/20220517163437.v4.2.I01c67af41d2f6525c6d023101671d7339a9bc8b5@changeid/
Signed-off-by: Alex Roberts <alex.roberts109@outlook.com>
---
include/linux/kernel_read_file.h | 1 +
security/loadpin/Kconfig | 12 ++++++++++++
security/loadpin/loadpin.c | 29 +++++++++++++++++++++++++++++
3 files changed, 42 insertions(+)
diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h
index d613a7b4dd35..75cbd514562f 100644
--- a/include/linux/kernel_read_file.h
+++ b/include/linux/kernel_read_file.h
@@ -15,6 +15,7 @@
id(POLICY, security-policy) \
id(X509_CERTIFICATE, x509-certificate) \
id(MODULE_COMPRESSED, kernel-module-compressed) \
+ id(EBPF, ebpf) \
id(MAX_ID, )
#define __fid_enumify(ENUM, dummy) READING_ ## ENUM,
diff --git a/security/loadpin/Kconfig b/security/loadpin/Kconfig
index aef63d3e30df..be0754735d5d 100644
--- a/security/loadpin/Kconfig
+++ b/security/loadpin/Kconfig
@@ -42,3 +42,15 @@ config SECURITY_LOADPIN_VERITY
This is followed by the verity digests, with one digest per
line.
+
+config SECURITY_LOADPIN_EBPF
+ bool "Pin ebpf programs to one filesystem"
+ depends on SECURITY_LOADPIN
+ help
+ eBPF program loading is pinned to the first filesystem from which the
+ application loading the progam resides. When enabled, applications on
+ other filesystems that attempt to load an eBPF program will be rejected.
+ This is best used when applications that load eBPF programs reside on a
+ read-only filesystem from which tools for dynamically generating eBPF
+ programs such as bpftrace are not installed. This will not restrict the
+ kernel from loading a bpf program.
\ No newline at end of file
diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
index f71861f98e1a..47fd7e247edb 100644
--- a/security/loadpin/loadpin.c
+++ b/security/loadpin/loadpin.c
@@ -23,6 +23,10 @@
#include <uapi/linux/loadpin.h>
#include <uapi/linux/lsm.h>
+#ifdef CONFIG_SECURITY_LOADPIN_EBPF
+#include <linux/bpf.h>
+#endif /* CONFIG_SECURITY_LOADPIN_BPF */
+
#define VERITY_DIGEST_FILE_HEADER "# LOADPIN_TRUSTED_VERITY_ROOT_DIGESTS"
static void report_load(const char *origin, struct file *file, char *operation)
@@ -204,6 +208,28 @@ static int loadpin_load_data(enum kernel_load_data_id id, bool contents)
return loadpin_check(NULL, (enum kernel_read_file_id) id);
}
+#ifdef CONFIG_SECURITY_LOADPIN_EBPF
+static int loadpin_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token, bool is_kernel)
+{
+ int res = 0;
+ struct file *exe_file = NULL;
+ struct mm_struct *mm = current->mm;
+
+ if (is_kernel || !mm)
+ return 0;
+
+ exe_file = get_mm_exe_file(mm);
+ if (!exe_file)
+ return 0;
+
+ res = loadpin_check(exe_file, READING_EBPF);
+ fput(exe_file);
+
+ return res;
+}
+#endif /* CONFIG_SECURITY_LOADPIN_EBPF */
+
static const struct lsm_id loadpin_lsmid = {
.name = "loadpin",
.id = LSM_ID_LOADPIN,
@@ -213,6 +239,9 @@ static struct security_hook_list loadpin_hooks[] __ro_after_init = {
LSM_HOOK_INIT(sb_free_security, loadpin_sb_free_security),
LSM_HOOK_INIT(kernel_read_file, loadpin_read_file),
LSM_HOOK_INIT(kernel_load_data, loadpin_load_data),
+#ifdef CONFIG_SECURITY_LOADPIN_EBPF
+ LSM_HOOK_INIT(bpf_prog_load, loadpin_bpf_prog_load),
+#endif /* CONFIG_SECURITY_LOADPIN_EBPF */
};
static void __init parse_exclude(void)
---
base-commit: 122b52f0bab007ebeb414c8280c1def17b9ed1f4
change-id: 20260611-b4-rfc-loadpin-ebpf-086c41deb503
Best regards,
--
Alex Roberts <alex.roberts109@outlook.com>
^ permalink raw reply related
* [PATCH v5 6/6] landlock: Add documentation for UDP support
From: Matthieu Buffet @ 2026-06-11 16:21 UTC (permalink / raw)
To: Mickaël Salaün, Günther Noack
Cc: linux-security-module, Mikhail Ivanov, konstantin.meskhidze,
Tingmao Wang, netdev, Matthieu Buffet
In-Reply-To: <20260611162107.49278-1-matthieu@buffet.re>
Add example of UDP usage, without detailing the two access right.
Slightly change the example used in code blocks: build a ruleset for a
DNS client, so that it uses both TCP and UDP.
Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
---
Documentation/userspace-api/landlock.rst | 91 +++++++++++++++++++-----
1 file changed, 72 insertions(+), 19 deletions(-)
diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
index 45861fa75685..0ea55c2c732c 100644
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@@ -8,7 +8,7 @@ Landlock: unprivileged access control
=====================================
:Author: Mickaël Salaün
-:Date: May 2026
+:Date: June 2026
The goal of Landlock is to enable restriction of ambient rights (e.g. global
filesystem or network access) for a set of processes. Because Landlock
@@ -40,8 +40,8 @@ Filesystem rules
and the related filesystem actions are defined with
`filesystem access rights`.
-Network rules (since ABI v4)
- For these rules, the object is a TCP port,
+Network rules (since ABI v4 for TCP and v10 for UDP)
+ For these rules, the object is a TCP or UDP port,
and the related actions are defined with `network access rights`.
Defining and enforcing a security policy
@@ -49,11 +49,11 @@ Defining and enforcing a security policy
We first need to define the ruleset that will contain our rules.
-For this example, the ruleset will contain rules that only allow filesystem
-read actions and establish a specific TCP connection. Filesystem write
-actions and other TCP actions will be denied.
+For this example, the ruleset will contain rules that only allow some
+filesystem read actions and some specific UDP and TCP actions. Filesystem
+write actions and other TCP/UDP actions will be denied.
-The ruleset then needs to handle both these kinds of actions. This is
+The ruleset then needs to handle all these kinds of actions. This is
required for backward and forward compatibility (i.e. the kernel and user
space may not know each other's supported restrictions), hence the need
to be explicit about the denied-by-default access rights.
@@ -81,7 +81,9 @@ to be explicit about the denied-by-default access rights.
LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
.handled_access_net =
LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ LANDLOCK_ACCESS_NET_CONNECT_TCP |
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
.scoped =
LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
LANDLOCK_SCOPE_SIGNAL,
@@ -132,6 +134,12 @@ version, and only use the available subset of access rights:
case 6 ... 8:
/* Removes LANDLOCK_ACCESS_FS_RESOLVE_UNIX for ABI < 9 */
ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_RESOLVE_UNIX;
+ __attribute__((fallthrough));
+ case 9:
+ /* Removes LANDLOCK_ACCESS_NET_*_UDP for ABI < 10 */
+ ruleset_attr.handled_access_net &=
+ ~(LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
}
This enables the creation of an inclusive ruleset that will contain our rules.
@@ -187,20 +195,52 @@ kernel does not support are dropped (the compatibility switch above already
cleared them in ``handled_access_*``), and the rule is skipped if no supported
right remains.
-For network access-control, we can add a set of rules that allow to use a port
-number for a specific action: HTTPS connections.
+For network access-control, we will add a set of rules to allow DNS
+queries, which requires both UDP and TCP. For TCP, we need to allow
+outbound connections to port 53, which can be handled and granted starting
+with ABI 4:
.. code-block:: c
- struct landlock_net_port_attr net_port = {
+ struct landlock_net_port_attr tcp_conn = {
.allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
- .port = 443,
+ .port = 53,
+ };
+
+ tcp_conn.allowed_access &= ruleset_attr.handled_access_net;
+ if (tcp_conn.allowed_access)
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_conn, 0);
+
+We also need to be able to send UDP datagrams to port 53, which requires
+granting ``LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP``. Since our DNS client will
+emit datagrams without explicitly binding to a specific source port, its UDP
+socket will automatically bind an ephemeral port. To allow this behaviour,
+we also need to grant ``LANDLOCK_ACCESS_NET_BIND_UDP`` on port 0, as if
+the program explicitly called :manpage:`bind(2)` on port 0.
+
+.. code-block:: c
+
+ struct landlock_net_port_attr udp_send = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ .port = 53,
+ };
+
+ udp_send.allowed_access &= ruleset_attr.handled_access_net;
+ if (udp_send.allowed_access)
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &udp_send, 0);
+ [...]
+
+ struct landlock_net_port_attr udp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_UDP,
+ .port = 0,
};
- net_port.allowed_access &= ruleset_attr.handled_access_net;
- if (net_port.allowed_access)
+ udp_bind.allowed_access &= ruleset_attr.handled_access_net;
+ if (udp_bind.allowed_access)
err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &net_port, 0);
+ &udp_bind, 0);
When passing a non-zero ``flags`` argument to ``landlock_restrict_self()``, a
similar backwards compatibility check is needed for the restrict flags
@@ -234,7 +274,7 @@ similar backwards compatibility check is needed for the restrict flags
The next step is to restrict the current thread from gaining more privileges
(e.g. through a SUID binary). We now have a ruleset with the first rule
allowing read and execute access to ``/usr`` while denying all other handled
-accesses for the filesystem, and a second rule allowing HTTPS connections.
+accesses for the filesystem, and two more rules allowing DNS queries.
.. code-block:: c
@@ -722,6 +762,19 @@ Starting with the Landlock ABI version 9, it is possible to restrict
connections to pathname UNIX domain sockets (:manpage:`unix(7)`) using
the new ``LANDLOCK_ACCESS_FS_RESOLVE_UNIX`` right.
+UDP bind, connect and send* (ABI < 10)
+--------------------------------------
+
+Starting with the Landlock ABI version 10, it is possible to restrict
+setting the local port of UDP sockets with the
+``LANDLOCK_ACCESS_NET_BIND_UDP`` right. This includes restricting the
+ability to trigger autobind of an ephemeral port by the kernel by e.g.
+sending a first datagram or setting the remote peer of a socket.
+The ``LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP`` right controls setting the
+remote port of UDP sockets (via :manpage:`connect(2)), and sending
+datagrams to an explicit remote port (ignoring any destination set on
+UDP sockets, via e.g. :manpage:`sendto(2)).
+
.. _kernel_support:
Kernel support
@@ -784,10 +837,10 @@ the boot loader.
Network support
---------------
-To be able to explicitly allow TCP operations (e.g., adding a network rule with
-``LANDLOCK_ACCESS_NET_BIND_TCP``), the kernel must support TCP
+To be able to explicitly allow TCP or UDP operations (e.g., adding a network rule with
+``LANDLOCK_ACCESS_NET_BIND_TCP``), the kernel must support the TCP/IP protocol suite
(``CONFIG_INET=y``). Otherwise, sys_landlock_add_rule() returns an
-``EAFNOSUPPORT`` error, which can safely be ignored because this kind of TCP
+``EAFNOSUPPORT`` error, which can safely be ignored because this kind of TCP or UDP
operation is already not possible.
Questions and answers
--
2.47.3
^ permalink raw reply related
* [PATCH v5 5/6] samples/landlock: Add sandboxer UDP access control
From: Matthieu Buffet @ 2026-06-11 16:21 UTC (permalink / raw)
To: Mickaël Salaün, Günther Noack
Cc: linux-security-module, Mikhail Ivanov, konstantin.meskhidze,
Tingmao Wang, netdev, Matthieu Buffet
In-Reply-To: <20260611162107.49278-1-matthieu@buffet.re>
Add environment variables to control associated access rights:
- LL_UDP_BIND
- LL_UDP_CONNECT_SEND
Each one takes a list of ports separated by colons, like other list
options.
Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
---
samples/landlock/sandboxer.c | 41 ++++++++++++++++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
index 66e56ae275c6..f44db2857bbf 100644
--- a/samples/landlock/sandboxer.c
+++ b/samples/landlock/sandboxer.c
@@ -62,6 +62,8 @@ static inline int landlock_restrict_self(const int ruleset_fd,
#define ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT"
#define ENV_SCOPED_NAME "LL_SCOPED"
#define ENV_FORCE_LOG_NAME "LL_FORCE_LOG"
+#define ENV_UDP_BIND_NAME "LL_UDP_BIND"
+#define ENV_UDP_CONNECT_SEND_NAME "LL_UDP_CONNECT_SEND"
#define ENV_DELIMITER ":"
static int str2num(const char *numstr, __u64 *num_dst)
@@ -301,7 +303,7 @@ static bool check_ruleset_scope(const char *const env_var,
/* clang-format on */
-#define LANDLOCK_ABI_LAST 9
+#define LANDLOCK_ABI_LAST 10
#define XSTR(s) #s
#define STR(s) XSTR(s)
@@ -324,6 +326,11 @@ static const char help[] =
"means an empty list):\n"
"* " ENV_TCP_BIND_NAME ": ports allowed to bind (server)\n"
"* " ENV_TCP_CONNECT_NAME ": ports allowed to connect (client)\n"
+ "* " ENV_UDP_BIND_NAME ": local UDP ports allowed to bind (server: "
+ "prepare to receive on port / client: set as source port)\n"
+ "* " ENV_UDP_CONNECT_SEND_NAME ": remote UDP ports allowed to connect "
+ "or send to (client: use as destination port / server: receive only from it)\n"
+ "(caution: sending requires being able to bind to a local source port)\n"
"* " ENV_SCOPED_NAME ": actions denied on the outside of the landlock domain\n"
" - \"a\" to restrict opening abstract unix sockets\n"
" - \"s\" to restrict sending signals\n"
@@ -336,6 +343,7 @@ static const char help[] =
ENV_FS_RW_NAME "=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
ENV_TCP_BIND_NAME "=\"9418\" "
ENV_TCP_CONNECT_NAME "=\"80:443\" "
+ ENV_UDP_CONNECT_SEND_NAME "=\"53\" "
ENV_SCOPED_NAME "=\"a:s\" "
"%1$s bash -i\n"
"\n"
@@ -356,7 +364,9 @@ int main(const int argc, char *const argv[], char *const *const envp)
struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = access_fs_rw,
.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ LANDLOCK_ACCESS_NET_CONNECT_TCP |
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
.scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
LANDLOCK_SCOPE_SIGNAL,
};
@@ -444,6 +454,13 @@ int main(const int argc, char *const argv[], char *const *const envp)
/* Removes LANDLOCK_ACCESS_FS_RESOLVE_UNIX for ABI < 9 */
ruleset_attr.handled_access_fs &=
~LANDLOCK_ACCESS_FS_RESOLVE_UNIX;
+ __attribute__((fallthrough));
+ case 9:
+ /* Removes UDP support for ABI < 10 */
+ ruleset_attr.handled_access_net &=
+ ~(LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
+
/* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
fprintf(stderr,
"Hint: You should update the running kernel "
@@ -475,6 +492,18 @@ int main(const int argc, char *const argv[], char *const *const envp)
ruleset_attr.handled_access_net &=
~LANDLOCK_ACCESS_NET_CONNECT_TCP;
}
+ /* Removes UDP bind access control if not supported by a user. */
+ env_port_name = getenv(ENV_UDP_BIND_NAME);
+ if (!env_port_name) {
+ ruleset_attr.handled_access_net &=
+ ~LANDLOCK_ACCESS_NET_BIND_UDP;
+ }
+ /* Removes UDP connect/send access control if not supported by a user. */
+ env_port_name = getenv(ENV_UDP_CONNECT_SEND_NAME);
+ if (!env_port_name) {
+ ruleset_attr.handled_access_net &=
+ ~LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP;
+ }
if (check_ruleset_scope(ENV_SCOPED_NAME, &ruleset_attr))
return 1;
@@ -519,6 +548,14 @@ int main(const int argc, char *const argv[], char *const *const envp)
LANDLOCK_ACCESS_NET_CONNECT_TCP)) {
goto err_close_ruleset;
}
+ if (populate_ruleset_net(ENV_UDP_BIND_NAME, ruleset_fd,
+ LANDLOCK_ACCESS_NET_BIND_UDP)) {
+ goto err_close_ruleset;
+ }
+ if (populate_ruleset_net(ENV_UDP_CONNECT_SEND_NAME, ruleset_fd,
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP)) {
+ goto err_close_ruleset;
+ }
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
perror("Failed to restrict privileges");
--
2.47.3
^ permalink raw reply related
* [PATCH v5 4/6] selftests/landlock: Add tests for UDP send
From: Matthieu Buffet @ 2026-06-11 16:21 UTC (permalink / raw)
To: Mickaël Salaün, Günther Noack
Cc: linux-security-module, Mikhail Ivanov, konstantin.meskhidze,
Tingmao Wang, netdev, Matthieu Buffet
In-Reply-To: <20260611162107.49278-1-matthieu@buffet.re>
Add tests specific to UDP sendmsg() in the protocol_* variants to ensure
behaviour is consistent across AF_INET, AF_INET6 and AF_UNIX.
Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
---
tools/testing/selftests/landlock/net_test.c | 653 +++++++++++++++++++-
1 file changed, 652 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index bbfecd999b32..05b41e4da28f 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -289,9 +289,163 @@ static int connect_variant(const int sock_fd,
return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
}
+static int sendto_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen, void *buf,
+ size_t len, size_t flags)
+{
+ const struct sockaddr *dst = NULL;
+ ssize_t ret;
+
+ /*
+ * We never want our processes to be killed by SIGPIPE: we check
+ * return codes and errno, so that we have actual error messages.
+ */
+ flags |= MSG_NOSIGNAL;
+
+ if (srv != NULL) {
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ dst = (const struct sockaddr *)&srv->ipv4_addr;
+ break;
+
+ case AF_INET6:
+ dst = (const struct sockaddr *)&srv->ipv6_addr;
+ break;
+
+ case AF_UNIX:
+ dst = (const struct sockaddr *)&srv->unix_addr;
+ break;
+
+ default:
+ errno = EAFNOSUPPORT;
+ return -errno;
+ }
+ }
+
+ ret = sendto(sock_fd, buf, len, flags, dst, addrlen);
+ if (ret < 0)
+ return -errno;
+
+ /* errno is not set in cases of partial writes. */
+ if (ret != len)
+ return -EINTR;
+
+ return 0;
+}
+
+static int sendto_variant(const int sock_fd,
+ const struct service_fixture *const srv, void *buf,
+ size_t len, size_t flags)
+{
+ socklen_t addrlen = 0;
+
+ if (srv != NULL)
+ addrlen = get_addrlen(srv, false);
+
+ return sendto_variant_addrlen(sock_fd, srv, addrlen, buf, len, flags);
+}
+
+static int test_sendmsg(struct __test_metadata *const _metadata,
+ const struct protocol_variant *prot, int client_fd,
+ int server_fd, const struct service_fixture *srv,
+ bool bind_denied, bool send_denied)
+{
+ int ret;
+ socklen_t opt_len;
+ int sock_type;
+ int addr_family;
+ struct sockaddr_storage peer_addr = { 0 };
+ bool has_remote_port;
+ bool needs_autobind;
+ char read_buf[1] = { 0 };
+
+ /*
+ * Prepare the test by inspecting the socket type and whether it
+ * has a local/remote address set (all of which determine the
+ * expected outcomes).
+ */
+ opt_len = sizeof(sock_type);
+ ASSERT_EQ(0, getsockopt(client_fd, SOL_SOCKET, SO_TYPE, &sock_type,
+ &opt_len));
+ opt_len = sizeof(addr_family);
+ ASSERT_EQ(0, getsockopt(client_fd, SOL_SOCKET, SO_DOMAIN, &addr_family,
+ &opt_len));
+ opt_len = sizeof(peer_addr);
+ has_remote_port = (getpeername(client_fd, (struct sockaddr *)&peer_addr,
+ &opt_len) == 0);
+ needs_autobind = (addr_family == AF_INET || addr_family == AF_INET6) &&
+ get_binded_port(client_fd, prot) == 0;
+
+ /* First, check error code with truncated explicit address. */
+ if (srv != NULL) {
+ ret = sendto_variant_addrlen(
+ client_fd, srv, get_addrlen(srv, true) - 1, "A", 1, 0);
+ if (sock_type == SOCK_STREAM && !has_remote_port) {
+ EXPECT_EQ(-EPIPE, ret)
+ {
+ return -1;
+ }
+ } else if (bind_denied && needs_autobind) {
+ EXPECT_EQ(-EACCES, ret)
+ {
+ return -1;
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret)
+ {
+ return -1;
+ }
+ }
+ }
+
+ /* With or without explicit destination address (srv can be NULL). */
+ ret = sendto_variant(client_fd, srv, "B", 1, 0);
+ if (sock_type == SOCK_STREAM && !has_remote_port) {
+ EXPECT_EQ(-EPIPE, ret)
+ {
+ return -1;
+ }
+ } else if ((send_denied && srv != NULL) ||
+ (bind_denied && needs_autobind)) {
+ ASSERT_EQ(-EACCES, ret)
+ {
+ return -1;
+ }
+ } else if (srv == NULL && !has_remote_port) {
+ if (addr_family == AF_UNIX) {
+ ASSERT_EQ(-ENOTCONN, ret)
+ {
+ return -1;
+ }
+ } else if (sock_type == SOCK_STREAM) {
+ ASSERT_EQ(-EPIPE, ret)
+ {
+ return -1;
+ }
+ } else {
+ ASSERT_EQ(-EDESTADDRREQ, ret)
+ {
+ return -1;
+ }
+ }
+ } else {
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(1, recv(server_fd, read_buf, 1, 0));
+ ASSERT_EQ(read_buf[0], 'B')
+ {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
FIXTURE(protocol)
{
- struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0;
+ struct service_fixture srv0, srv1, srv2;
+ struct service_fixture unspec_any0, unspec_srv0, unspec_srv1;
};
FIXTURE_VARIANT(protocol)
@@ -313,6 +467,7 @@ FIXTURE_SETUP(protocol)
ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2));
ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
+ ASSERT_EQ(0, set_service(&self->unspec_srv1, prot_unspec, 1));
ASSERT_EQ(0, set_service(&self->unspec_any0, prot_unspec, 0));
self->unspec_any0.ipv4_addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -1126,6 +1281,441 @@ TEST_F(protocol, connect_unspec)
EXPECT_EQ(0, close(bind_fd));
}
+TEST_F(protocol, sendmsg_stream)
+{
+ int srv0_fd, tmp_fd, client_fd, res;
+ char read_buf[1] = { 0 };
+
+ /*
+ * Simple test for stream sockets: just deny all connect()/
+ * send(explicit addr)/bind(), and make sure we don't interfere
+ * with any operation.
+ */
+ if (variant->prot.type != SOCK_STREAM)
+ return;
+
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ ASSERT_LE(0, srv0_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, bind_variant(srv0_fd, &self->srv0));
+ ASSERT_EQ(0, listen(srv0_fd, backlog));
+
+ /* Send on a non-connected socket. */
+ res = sendto_variant(client_fd, NULL, "A", 1, 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-ENOTCONN, res);
+ } else {
+ EXPECT_EQ(-EPIPE, res);
+ }
+
+ /* Send to a truncated (invalid) address on a non-connected socket. */
+ res = sendto_variant_addrlen(client_fd, &self->srv0,
+ get_addrlen(&self->srv0, true) - 1, "B", 1,
+ 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-EOPNOTSUPP, res);
+ } else {
+ EXPECT_EQ(-EPIPE, res);
+ }
+
+ /* Connect. */
+ ASSERT_EQ(0, connect_variant(client_fd, &self->srv0));
+ tmp_fd = accept(srv0_fd, NULL, 0);
+ ASSERT_LE(0, tmp_fd);
+ EXPECT_EQ(0, close(srv0_fd));
+ srv0_fd = tmp_fd;
+
+ /* Send without an explicit address. */
+ EXPECT_EQ(0, sendto_variant(client_fd, NULL, "C", 1, 0));
+ EXPECT_EQ(1, recv(srv0_fd, read_buf, 1, 0))
+ {
+ TH_LOG("recv() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'C');
+
+ /* Send to a truncated (invalid) address. */
+ res = sendto_variant_addrlen(client_fd, &self->srv0,
+ get_addrlen(&self->srv0, true) - 1, "D", 1,
+ 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-EISCONN, res);
+ } else {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, recv(srv0_fd, read_buf, 1, 0))
+ {
+ TH_LOG("recv() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'D');
+ }
+
+ /* Send to a valid but different address. */
+ res = sendto_variant(client_fd, &self->srv1, "E", 1, 0);
+ if (variant->prot.domain == AF_UNIX) {
+ EXPECT_EQ(-EISCONN, res);
+ } else {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, recv(srv0_fd, read_buf, 1, 0))
+ {
+ TH_LOG("recv() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'E');
+ }
+
+ EXPECT_EQ(0, close(client_fd));
+}
+
+TEST_F(protocol, sendmsg_dgram)
+{
+ const bool restricted = is_restricted(&variant->prot, variant->sandbox);
+ int srv0_fd, srv1_fd, client_fd, child, status, res;
+
+ if (variant->prot.type != SOCK_DGRAM)
+ return;
+
+ /* Prepare server on port #0 to be allowed. */
+ ASSERT_LE(0, srv0_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, bind_variant(srv0_fd, &self->srv0));
+
+ /* And another server on port #1 to be denied. */
+ ASSERT_LE(0, srv1_fd = socket_variant(&self->srv1));
+ ASSERT_EQ(0, bind_variant(srv1_fd, &self->srv1));
+
+ /*
+ * Check that sockets connected before restrictions are not
+ * impacted in any way.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, connect_variant(client_fd, &self->srv0));
+ if (variant->sandbox == UDP_SANDBOX) {
+ /* Deny all connect()/send(explicit addr)/bind(). */
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ EXPECT_EQ(0,
+ test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, NULL, restricted, restricted));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, restricted,
+ restricted));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv1_fd, &self->srv1, restricted,
+ restricted));
+ EXPECT_EQ(0, close(client_fd));
+ _exit(_metadata->exit_code);
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Restrict connect/send, but not bind(). Then try sending with
+ * no destination (and no remote peer set), an allowed
+ * destination, then a denied destination.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const struct landlock_net_port_attr send_p0 = {
+ .allowed_access =
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ .port = self->srv0.port,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &send_p0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ -1, NULL, false, false));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, false, false));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv1_fd, &self->srv1, false,
+ restricted));
+ EXPECT_EQ(0, close(client_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Rest of this test is just for autobind enforcement, which only
+ * exists in IP sockets.
+ */
+ if (variant->prot.domain != AF_INET && variant->prot.domain != AF_INET6)
+ return;
+
+ /* Restrict bind() to explicit calls with an arbitrary (non-0) port. */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const uint16_t allowed_src_port = 42424;
+ struct service_fixture allowed_src;
+
+ allowed_src = self->srv0;
+ set_port(&allowed_src, allowed_src_port);
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_UDP,
+ .port = allowed_src_port,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+
+ /* Check that implicit bind(0) in sendmsg() is denied. */
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, restricted,
+ false));
+
+ /* Same thing for autobind in connect(). */
+ res = connect_variant(client_fd, &self->srv0);
+ if (restricted) {
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ EXPECT_EQ(0, res);
+ }
+ EXPECT_EQ(0, close(client_fd));
+
+ /* Make sendmsg() work by explicitly binding to the only allowed port. */
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, bind_variant(client_fd, &allowed_src));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, restricted,
+ false));
+ EXPECT_EQ(0, close(client_fd));
+
+ /* Make connect() work by explicitly binding to the only allowed port. */
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, bind_variant(client_fd, &allowed_src));
+ EXPECT_EQ(0, connect_variant(client_fd, &self->srv0));
+ EXPECT_EQ(0, close(client_fd));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Check that %LANDLOCK_ACCESS_NET_BIND_UDP on port 0 allows
+ * implicit autobinds.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_BIND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_UDP,
+ .port = 0,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+ EXPECT_EQ(0, test_sendmsg(_metadata, &variant->prot, client_fd,
+ srv0_fd, &self->srv0, false, false));
+ EXPECT_EQ(0, close(client_fd));
+ _exit(_metadata->exit_code);
+ }
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+}
+
+TEST_F(protocol, sendmsg_unspec)
+{
+ const bool restricted = is_restricted(&variant->prot, variant->sandbox);
+ int client_fd, srv0_fd, srv1_fd, res;
+ char read_buf[1] = { 0 };
+
+ /*
+ * We already test for the absence of influence on sendmsg for
+ * other socket types and other address families, there's no
+ * point in adapting this test for stream sockets too.
+ */
+ if (variant->prot.type != SOCK_DGRAM)
+ return;
+
+ /* Prepare client of the right family. */
+ ASSERT_LE(0, client_fd = socket_variant(&self->srv0));
+
+ /* Prepare server on port #0 to be allowed. */
+ ASSERT_LE(0, srv0_fd = socket_variant(&self->srv0));
+ ASSERT_EQ(0, bind_variant(srv0_fd, &self->srv0));
+
+ /* And another server on port #1 to be denied. */
+ ASSERT_LE(0, srv1_fd = socket_variant(&self->srv1));
+ ASSERT_EQ(0, bind_variant(srv1_fd, &self->srv1));
+
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net =
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ .port = self->srv0.port,
+ };
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Explicit AF_UNSPEC address but truncated. */
+ EXPECT_EQ(-EINVAL, sendto_variant_addrlen(
+ client_fd, &self->unspec_srv0,
+ get_addrlen(&self->unspec_srv0, true) - 1,
+ "A", 1, 0));
+
+ /*
+ * Explicit AF_UNSPEC address, should be treated as AF_INET by
+ * IPv4 sockets (and thus map to srv0, allowed), but be denied by
+ * IPv6 sockets.
+ */
+ res = sendto_variant(client_fd, &self->unspec_srv0, "B", 1, 0);
+ if (variant->prot.domain == AF_INET6) {
+ if (restricted) {
+ /* Always denied on IPv6 socket. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ /* IPv6 sockets treat AF_UNSPEC as a NULL address. */
+ EXPECT_EQ(-EDESTADDRREQ, res);
+ }
+ } else if (variant->prot.domain == AF_INET) {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, read(srv0_fd, read_buf, 1))
+ {
+ TH_LOG("read() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'B');
+ } else {
+ /* Unix sockets don't accept AF_UNSPEC. */
+ EXPECT_EQ(-EINVAL, res);
+ }
+
+ /*
+ * Explicit AF_UNSPEC address, should be treated as AF_INET on
+ * IPv4 sockets (and thus map to srv1, denied), and be denied
+ * on IPv6 sockets as always.
+ */
+ res = sendto_variant(client_fd, &self->unspec_srv1, "C", 1, 0);
+ if (variant->prot.domain == AF_INET6) {
+ if (restricted) {
+ /* Always denied on IPv6 socket. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ /* IPv6 sockets treat AF_UNSPEC as a NULL address. */
+ EXPECT_EQ(-EDESTADDRREQ, res);
+ }
+ } else if (variant->prot.domain == AF_INET) {
+ if (restricted) {
+ /* Sending to srv1 is not allowed, only srv0. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, read(srv1_fd, read_buf, 1))
+ {
+ TH_LOG("read() failed: %s", strerror(errno));
+ }
+ EXPECT_EQ(read_buf[0], 'C');
+ }
+ } else {
+ /* Unix sockets don't accept AF_UNSPEC. */
+ EXPECT_EQ(-EINVAL, res);
+ }
+
+ ASSERT_EQ(0, connect_variant(client_fd, &self->srv0));
+
+ /* Minimal explicit AF_UNSPEC address (just the sa_family_t field) */
+ res = sendto_variant_addrlen(client_fd, &self->unspec_srv0,
+ get_addrlen(&self->unspec_srv0, true), "D",
+ 1, 0);
+ if (variant->prot.domain == AF_INET6) {
+ if (restricted) {
+ /* AF_UNSPEC is always denied in IPv6. */
+ EXPECT_EQ(-EACCES, res);
+ } else {
+ /*
+ * IPv6 sockets treat AF_UNSPEC as a NULL address,
+ * falling back to the connected address.
+ */
+ ASSERT_EQ(0, res);
+ EXPECT_EQ(1, read(srv0_fd, read_buf, 1));
+ EXPECT_EQ(read_buf[0], 'D');
+ }
+ } else {
+ /*
+ * IPv4 socket will expect a struct sockaddr_in, our address
+ * is considered truncated.
+ * And Unix sockets don't accept AF_UNSPEC at all.
+ */
+ EXPECT_EQ(-EINVAL, res);
+ }
+}
+
FIXTURE(ipv4)
{
struct service_fixture srv0, srv1;
@@ -2187,6 +2777,7 @@ FIXTURE(audit)
{
struct service_fixture srv0;
struct service_fixture srv1;
+ struct service_fixture unspec_srv0;
struct audit_filter audit_filter;
int audit_fd;
};
@@ -2239,8 +2830,13 @@ FIXTURE_VARIANT_ADD(audit, ipv6_udp) {
FIXTURE_SETUP(audit)
{
+ struct protocol_variant prot_unspec = variant->prot;
+
+ prot_unspec.domain = AF_UNSPEC;
+
ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1));
+ ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
setup_loopback(_metadata);
@@ -2347,4 +2943,59 @@ TEST_F(audit, connect)
EXPECT_EQ(0, close(sock_fd));
}
+TEST_F(audit, sendmsg)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP |
+ LANDLOCK_ACCESS_NET_BIND_UDP,
+ };
+ const struct landlock_net_port_attr rule = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP,
+ .port = self->srv1.port,
+ };
+ struct audit_records records;
+ int ruleset_fd;
+ int sock_fd;
+
+ /* Sendmsg on stream sockets is never denied. */
+ if (variant->prot.type != SOCK_DGRAM)
+ return;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &rule, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, sendto_variant(sock_fd, &self->srv0, "A", 1, 0));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.connect_send_udp",
+ "daddr", variant->addr, "dest"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ /* Check that autobind generates a denied bind event. */
+ EXPECT_EQ(-EACCES, sendto_variant(sock_fd, &self->srv1, "A", 1, 0));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.bind_udp", NULL,
+ NULL, NULL));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ EXPECT_EQ(-EACCES,
+ sendto_variant(sock_fd, &self->unspec_srv0, "B", 1, 0));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.connect_send_udp",
+ "daddr", NULL, "dest"));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
TEST_HARNESS_MAIN
--
2.47.3
^ permalink raw reply related
* [PATCH v5 3/6] selftests/landlock: Add tests for UDP bind/connect
From: Matthieu Buffet @ 2026-06-11 16:21 UTC (permalink / raw)
To: Mickaël Salaün, Günther Noack
Cc: linux-security-module, Mikhail Ivanov, konstantin.meskhidze,
Tingmao Wang, netdev, Matthieu Buffet
In-Reply-To: <20260611162107.49278-1-matthieu@buffet.re>
Make basic changes to the existing bind() and connect() test suite to
cover UDP restriction.
Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
---
tools/testing/selftests/landlock/net_test.c | 507 ++++++++++++++++----
1 file changed, 413 insertions(+), 94 deletions(-)
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 016c7277e370..bbfecd999b32 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -35,6 +35,7 @@ enum sandbox_type {
NO_SANDBOX,
/* This may be used to test rules that allow *and* deny accesses. */
TCP_SANDBOX,
+ UDP_SANDBOX,
};
static int set_service(struct service_fixture *const srv,
@@ -93,23 +94,53 @@ static bool prot_is_tcp(const struct protocol_variant *const prot)
(prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP);
}
+static bool prot_is_udp(const struct protocol_variant *const prot)
+{
+ return (prot->domain == AF_INET || prot->domain == AF_INET6) &&
+ prot->type == SOCK_DGRAM &&
+ (prot->protocol == IPPROTO_UDP || prot->protocol == IPPROTO_IP);
+}
+
static bool is_restricted(const struct protocol_variant *const prot,
const enum sandbox_type sandbox)
{
if (sandbox == TCP_SANDBOX)
return prot_is_tcp(prot);
+ else if (sandbox == UDP_SANDBOX)
+ return prot_is_udp(prot);
return false;
}
static int socket_variant(const struct service_fixture *const srv)
{
+ /* Arbitrary value just to not block other tests indefinitely. */
+ const struct timeval timeout = {
+ .tv_sec = 0,
+ .tv_usec = 100000,
+ };
+ int sockfd;
int ret;
- ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
- srv->protocol.protocol);
- if (ret < 0)
+ sockfd = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
+ srv->protocol.protocol);
+ if (sockfd < 0)
return -errno;
- return ret;
+
+ ret = setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+ sizeof(timeout));
+ if (ret != 0) {
+ ret = -errno;
+ close(sockfd);
+ return ret;
+ }
+ ret = setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+ sizeof(timeout));
+ if (ret != 0) {
+ ret = -errno;
+ close(sockfd);
+ return ret;
+ }
+ return sockfd;
}
#ifndef SIN6_LEN_RFC2133
@@ -271,10 +302,9 @@ FIXTURE_VARIANT(protocol)
FIXTURE_SETUP(protocol)
{
- const struct protocol_variant prot_unspec = {
- .domain = AF_UNSPEC,
- .type = SOCK_STREAM,
- };
+ struct protocol_variant prot_unspec = variant->prot;
+
+ prot_unspec.domain = AF_UNSPEC;
disable_caps(_metadata);
@@ -510,6 +540,92 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) {
},
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv4_udp1) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv4_udp2) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv6_udp1) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_UDP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv6_udp2) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv4_tcp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_ipv6_tcp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, udp_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
static void test_bind_and_connect(struct __test_metadata *const _metadata,
const struct service_fixture *const srv,
const bool deny_bind, const bool deny_connect)
@@ -602,7 +718,7 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata,
ret = connect_variant(connect_fd, srv);
if (deny_connect) {
EXPECT_EQ(-EACCES, ret);
- } else if (deny_bind) {
+ } else if (deny_bind && srv->protocol.type == SOCK_STREAM) {
/* No listening server. */
EXPECT_EQ(-ECONNREFUSED, ret);
} else {
@@ -641,18 +757,25 @@ static void test_bind_and_connect(struct __test_metadata *const _metadata,
TEST_F(protocol, bind)
{
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 bind_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 conn_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = bind_access | conn_access,
};
- const struct landlock_net_port_attr tcp_bind_connect_p0 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_p0 = {
+ .allowed_access = bind_access | conn_access,
.port = self->srv0.port,
};
- const struct landlock_net_port_attr tcp_connect_p1 = {
- .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr connect_p1 = {
+ .allowed_access = conn_access,
.port = self->srv1.port,
};
int ruleset_fd;
@@ -664,12 +787,26 @@ TEST_F(protocol, bind)
/* Allows connect and bind for the first port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_p0, 0));
+ &bind_connect_p0, 0));
/* Allows connect and denies bind for the second port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_connect_p1, 0));
+ &connect_p1, 0));
+
+ /*
+ * For UDP sockets, allows binding to ephemeral ports
+ * (required to connect or send a first datagram)
+ */
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_net_port_attr bind_ephemeral = {
+ .allowed_access = bind_access,
+ .port = 0,
+ };
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &bind_ephemeral, 0));
+ }
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -691,18 +828,25 @@ TEST_F(protocol, bind)
TEST_F(protocol, connect)
{
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 bind_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 conn_access =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = bind_access | conn_access,
};
- const struct landlock_net_port_attr tcp_bind_connect_p0 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_p0 = {
+ .allowed_access = bind_access | conn_access,
.port = self->srv0.port,
};
- const struct landlock_net_port_attr tcp_bind_p1 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ const struct landlock_net_port_attr bind_p1 = {
+ .allowed_access = bind_access,
.port = self->srv1.port,
};
int ruleset_fd;
@@ -714,12 +858,26 @@ TEST_F(protocol, connect)
/* Allows connect and bind for the first port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_p0, 0));
+ &bind_connect_p0, 0));
/* Allows bind and denies connect for the second port. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_p1, 0));
+ &bind_p1, 0));
+
+ /*
+ * For UDP sockets, allows binding to ephemeral ports
+ * (required to connect or send a first datagram)
+ */
+ if (variant->sandbox == UDP_SANDBOX) {
+ const struct landlock_net_port_attr bind_ephemeral = {
+ .allowed_access = bind_access,
+ .port = 0,
+ };
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &bind_ephemeral, 0));
+ }
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -737,16 +895,20 @@ TEST_F(protocol, connect)
TEST_F(protocol, bind_unspec)
{
+ const __u64 bind_access = (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .handled_access_net = bind_access,
};
- const struct landlock_net_port_attr tcp_bind = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ const struct landlock_net_port_attr rule_bind = {
+ .allowed_access = bind_access,
.port = self->srv0.port,
};
int bind_fd, ret;
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
@@ -754,7 +916,7 @@ TEST_F(protocol, bind_unspec)
/* Allows bind. */
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind, 0));
+ &rule_bind, 0));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
}
@@ -782,7 +944,8 @@ TEST_F(protocol, bind_unspec)
}
EXPECT_EQ(0, close(bind_fd));
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
@@ -828,11 +991,21 @@ TEST_F(protocol, bind_unspec)
TEST_F(protocol, connect_unspec)
{
- const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const __u64 connect_right =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
+ const __u64 bind_right = (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const struct landlock_ruleset_attr ruleset_conn = {
+ .handled_access_net = connect_right,
};
- const struct landlock_net_port_attr tcp_connect = {
- .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_ruleset_attr ruleset_conn_bind = {
+ .handled_access_net = connect_right | bind_right,
+ };
+ const struct landlock_net_port_attr rule_connect = {
+ .allowed_access = connect_right,
.port = self->srv0.port,
};
int bind_fd, client_fd, status;
@@ -865,15 +1038,16 @@ TEST_F(protocol, connect_unspec)
EXPECT_EQ(0, ret);
}
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
- &ruleset_attr, sizeof(ruleset_attr), 0);
+ &ruleset_conn, sizeof(ruleset_conn), 0);
ASSERT_LE(0, ruleset_fd);
/* Allows connect. */
ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
LANDLOCK_RULE_NET_PORT,
- &tcp_connect, 0));
+ &rule_connect, 0));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
}
@@ -896,12 +1070,14 @@ TEST_F(protocol, connect_unspec)
EXPECT_EQ(0, ret);
}
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
const int ruleset_fd = landlock_create_ruleset(
- &ruleset_attr, sizeof(ruleset_attr), 0);
+ &ruleset_conn_bind, sizeof(ruleset_conn_bind),
+ 0);
ASSERT_LE(0, ruleset_fd);
- /* Denies connect. */
+ /* Denies connect and bind. */
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
}
@@ -975,6 +1151,13 @@ FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_tcp) {
.type = SOCK_STREAM,
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, udp_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
/* clang-format off */
FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_udp) {
/* clang-format on */
@@ -989,6 +1172,13 @@ FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_udp) {
.type = SOCK_DGRAM,
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, udp_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
FIXTURE_SETUP(ipv4)
{
const struct protocol_variant prot = {
@@ -1012,14 +1202,19 @@ TEST_F(ipv4, from_unix_to_inet)
{
int unix_stream_fd, unix_dgram_fd;
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 access_rights =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = access_rights,
};
const struct landlock_net_port_attr tcp_bind_connect_p0 = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .allowed_access = access_rights,
.port = self->srv0.port,
};
int ruleset_fd;
@@ -1680,6 +1875,7 @@ TEST_F(ipv4_tcp, with_fs)
FIXTURE(port_specific)
{
struct service_fixture srv0;
+ struct service_fixture cli1;
};
FIXTURE_VARIANT(port_specific)
@@ -1699,7 +1895,7 @@ FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv4) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
+FIXTURE_VARIANT_ADD(port_specific, tcp_sandbox_with_ipv4) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
@@ -1708,6 +1904,16 @@ FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
},
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, udp_sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
/* clang-format off */
FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
/* clang-format on */
@@ -1719,7 +1925,7 @@ FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
+FIXTURE_VARIANT_ADD(port_specific, tcp_sandbox_with_ipv6) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
@@ -1728,11 +1934,22 @@ FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
},
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, udp_sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = UDP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
FIXTURE_SETUP(port_specific)
{
disable_caps(_metadata);
ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ ASSERT_EQ(0, set_service(&self->cli1, variant->prot, 1));
setup_loopback(_metadata);
};
@@ -1747,14 +1964,19 @@ TEST_F(port_specific, bind_connect_zero)
uint16_t port;
/* Adds a rule layer with bind and connect actions. */
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 access_rights =
+ (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP
+ .handled_access_net = access_rights,
};
- const struct landlock_net_port_attr tcp_bind_connect_zero = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_zero = {
+ .allowed_access = access_rights,
.port = 0,
};
int ruleset_fd;
@@ -1766,7 +1988,7 @@ TEST_F(port_specific, bind_connect_zero)
/* Checks zero port value on bind and connect actions. */
EXPECT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_zero, 0));
+ &bind_connect_zero, 0));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -1787,11 +2009,16 @@ TEST_F(port_specific, bind_connect_zero)
ret = bind_variant(bind_fd, &self->srv0);
EXPECT_EQ(0, ret);
- EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
/* Connects on port 0. */
ret = connect_variant(connect_fd, &self->srv0);
- EXPECT_EQ(-ECONNREFUSED, ret);
+ if (variant->prot.type == SOCK_STREAM) {
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
/* Sets binded port for both protocol families. */
port = get_binded_port(bind_fd, &variant->prot);
@@ -1815,23 +2042,35 @@ TEST_F(port_specific, bind_connect_1023)
int bind_fd, connect_fd, ret;
/* Adds a rule layer with bind and connect actions. */
- if (variant->sandbox == TCP_SANDBOX) {
+ if (variant->sandbox == TCP_SANDBOX ||
+ variant->sandbox == UDP_SANDBOX) {
+ const __u64 bind_right = (variant->sandbox == TCP_SANDBOX ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 access_rights =
+ (variant->sandbox == TCP_SANDBOX ?
+ (LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP) :
+ (LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP));
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP
+ .handled_access_net = access_rights,
};
/* A rule with port value less than 1024. */
- const struct landlock_net_port_attr tcp_bind_connect_low_range = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect_low_range = {
+ .allowed_access = access_rights,
.port = 1023,
};
/* A rule with 1024 port. */
- const struct landlock_net_port_attr tcp_bind_connect = {
- .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ const struct landlock_net_port_attr bind_connect = {
+ .allowed_access = access_rights,
.port = 1024,
};
+ /* A rule with cli1's port, to use as source port. */
+ const struct landlock_net_port_attr srcport = {
+ .allowed_access = bind_right,
+ .port = self->cli1.port,
+ };
int ruleset_fd;
ruleset_fd = landlock_create_ruleset(&ruleset_attr,
@@ -1840,10 +2079,15 @@ TEST_F(port_specific, bind_connect_1023)
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect_low_range, 0));
+ &bind_connect_low_range, 0));
ASSERT_EQ(0,
landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
- &tcp_bind_connect, 0));
+ &bind_connect, 0));
+ if (variant->sandbox == UDP_SANDBOX) {
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &srcport, 0));
+ }
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
@@ -1852,9 +2096,6 @@ TEST_F(port_specific, bind_connect_1023)
bind_fd = socket_variant(&self->srv0);
ASSERT_LE(0, bind_fd);
- connect_fd = socket_variant(&self->srv0);
- ASSERT_LE(0, connect_fd);
-
/* Sets address port to 1023 for both protocol families. */
set_port(&self->srv0, 1023);
/* Binds on port 1023. */
@@ -1867,8 +2108,19 @@ TEST_F(port_specific, bind_connect_1023)
ret = bind_variant(bind_fd, &self->srv0);
clear_cap(_metadata, CAP_NET_BIND_SERVICE);
EXPECT_EQ(0, ret);
- EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+ if (variant->prot.type == SOCK_DGRAM) {
+ /*
+ * We are about to connect(), but bind() is restricted, so for
+ * UDP sockets we need to use cli1's port as source port (the
+ * only one we are allowed to use).
+ */
+ EXPECT_EQ(0, bind_variant(connect_fd, &self->cli1));
+ }
/* Connects on the binded port 1023. */
ret = connect_variant(connect_fd, &self->srv0);
EXPECT_EQ(0, ret);
@@ -1887,7 +2139,10 @@ TEST_F(port_specific, bind_connect_1023)
/* Binds on port 1024. */
ret = bind_variant(bind_fd, &self->srv0);
EXPECT_EQ(0, ret);
- EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+ if (variant->prot.type == SOCK_DGRAM)
+ EXPECT_EQ(0, bind_variant(connect_fd, &self->cli1));
/* Connects on the binded port 1024. */
ret = connect_variant(connect_fd, &self->srv0);
@@ -1897,23 +2152,30 @@ TEST_F(port_specific, bind_connect_1023)
EXPECT_EQ(0, close(bind_fd));
}
-static int matches_log_tcp(const int audit_fd, const char *const blockers,
- const char *const dir_addr, const char *const addr,
- const char *const dir_port)
+static int matches_auditlog(const int audit_fd, const char *const blockers,
+ const char *const dir_addr, const char *const addr,
+ const char *const dir_port)
{
- static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ static const char log_with_addrport_tmpl[] = REGEX_LANDLOCK_PREFIX
" blockers=%s %s=%s %s=1024$";
+ static const char log_without_addrport_tmpl[] = REGEX_LANDLOCK_PREFIX
+ " blockers=%s";
/*
* Max strlen(blockers): 16
* Max strlen(dir_addr): 5
* Max strlen(addr): 12
* Max strlen(dir_port): 4
*/
- char log_match[sizeof(log_template) + 37];
+ char log_match[sizeof(log_with_addrport_tmpl) + 37];
int log_match_len;
- log_match_len = snprintf(log_match, sizeof(log_match), log_template,
- blockers, dir_addr, addr, dir_port);
+ if (addr == NULL)
+ log_match_len = snprintf(log_match, sizeof(log_match),
+ log_without_addrport_tmpl, blockers);
+ else
+ log_match_len = snprintf(log_match, sizeof(log_match),
+ log_with_addrport_tmpl, blockers,
+ dir_addr, addr, dir_port);
if (log_match_len > sizeof(log_match))
return -E2BIG;
@@ -1924,6 +2186,7 @@ static int matches_log_tcp(const int audit_fd, const char *const blockers,
FIXTURE(audit)
{
struct service_fixture srv0;
+ struct service_fixture srv1;
struct audit_filter audit_filter;
int audit_fd;
};
@@ -1935,7 +2198,7 @@ FIXTURE_VARIANT(audit)
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(audit, ipv4) {
+FIXTURE_VARIANT_ADD(audit, ipv4_tcp) {
/* clang-format on */
.addr = "127\\.0\\.0\\.1",
.prot = {
@@ -1945,7 +2208,17 @@ FIXTURE_VARIANT_ADD(audit, ipv4) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(audit, ipv6) {
+FIXTURE_VARIANT_ADD(audit, ipv4_udp) {
+ /* clang-format on */
+ .addr = "127\\.0\\.0\\.1",
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv6_tcp) {
/* clang-format on */
.addr = "::1",
.prot = {
@@ -1954,9 +2227,21 @@ FIXTURE_VARIANT_ADD(audit, ipv6) {
},
};
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv6_udp) {
+ /* clang-format on */
+ .addr = "::1",
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
FIXTURE_SETUP(audit)
{
ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1));
+
setup_loopback(_metadata);
set_cap(_metadata, CAP_AUDIT_CONTROL);
@@ -1974,9 +2259,17 @@ FIXTURE_TEARDOWN(audit)
TEST_F(audit, bind)
{
+ const char *audit_evt = (variant->prot.type == SOCK_STREAM ?
+ "net\\.bind_tcp" :
+ "net\\.bind_udp");
+ const __u64 access_rights =
+ (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP |
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = access_rights,
};
struct audit_records records;
int ruleset_fd, sock_fd;
@@ -1990,8 +2283,8 @@ TEST_F(audit, bind)
sock_fd = socket_variant(&self->srv0);
ASSERT_LE(0, sock_fd);
EXPECT_EQ(-EACCES, bind_variant(sock_fd, &self->srv0));
- EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.bind_tcp", "saddr",
- variant->addr, "src"));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, audit_evt, "saddr",
+ variant->addr, "src"));
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
EXPECT_EQ(0, records.access);
@@ -2002,9 +2295,22 @@ TEST_F(audit, bind)
TEST_F(audit, connect)
{
+ const char *audit_evt = (variant->prot.type == SOCK_STREAM ?
+ "net\\.connect_tcp" :
+ "net\\.connect_send_udp");
+ const __u64 bind_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_BIND_TCP :
+ LANDLOCK_ACCESS_NET_BIND_UDP);
+ const __u64 conn_right = (variant->prot.type == SOCK_STREAM ?
+ LANDLOCK_ACCESS_NET_CONNECT_TCP :
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP);
+ const __u64 access_rights = bind_right | conn_right;
const struct landlock_ruleset_attr ruleset_attr = {
- .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
- LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_net = access_rights,
+ };
+ const struct landlock_net_port_attr rule_connect_p1 = {
+ .allowed_access = conn_right,
+ .port = self->srv1.port,
};
struct audit_records records;
int ruleset_fd, sock_fd;
@@ -2012,19 +2318,32 @@ TEST_F(audit, connect)
ruleset_fd =
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &rule_connect_p1, 0));
enforce_ruleset(_metadata, ruleset_fd);
EXPECT_EQ(0, close(ruleset_fd));
sock_fd = socket_variant(&self->srv0);
ASSERT_LE(0, sock_fd);
EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv0));
- EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.connect_tcp",
- "daddr", variant->addr, "dest"));
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, audit_evt, "daddr",
+ variant->addr, "dest"));
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
EXPECT_EQ(0, records.access);
EXPECT_EQ(1, records.domain);
+ if (variant->prot.type == SOCK_DGRAM) {
+ /* Check that autobind generates a denied bind event. */
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv1));
+
+ EXPECT_EQ(0, matches_auditlog(self->audit_fd, "net\\.bind_udp",
+ NULL, NULL, NULL));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+ }
+
EXPECT_EQ(0, close(sock_fd));
}
--
2.47.3
^ permalink raw reply related
* [PATCH v5 2/6] landlock: Add UDP send+connect access control
From: Matthieu Buffet @ 2026-06-11 16:21 UTC (permalink / raw)
To: Mickaël Salaün, Günther Noack
Cc: linux-security-module, Mikhail Ivanov, konstantin.meskhidze,
Tingmao Wang, netdev, Matthieu Buffet
In-Reply-To: <20260611162107.49278-1-matthieu@buffet.re>
Add support for a second fine-grained UDP access right.
LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP controls the ability to set the
remote port of a socket (via connect()) and to specify an explicit
destination when sending a datagram, to override any remote peer set on
a UDP socket (e.g. in sendto() or sendmsg()).
It will be useful for applications that send datagrams, and for some
servers too (those creating per-client sockets, which want to receive
traffic only from a specific address).
Similarly as for bind(), this access control is performed when
configuring sockets, not in hot code paths.
Add detection of when autobind is about to be required, and deny the
operation if the process would not be allowed to call bind(0)
explicitly. Autobind can only be performed in udp_lib_get_port() from
code paths already controlled by LSM hooks: when connect()ing,
sending a first datagram, and in some splice() EOF edge case which,
afaiu, can only happen after a remote peer has been set. This invariant
needs to be preserved to keep bind policies actually enforced.
Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
---
include/uapi/linux/landlock.h | 23 ++++
security/landlock/audit.c | 2 +
security/landlock/limits.h | 2 +-
security/landlock/net.c | 137 +++++++++++++++++---
tools/testing/selftests/landlock/net_test.c | 5 +-
5 files changed, 151 insertions(+), 18 deletions(-)
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 045b251ff1b4..b147223efc97 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -378,11 +378,34 @@ struct landlock_net_port_attr {
*
* - %LANDLOCK_ACCESS_NET_BIND_UDP: Bind UDP sockets to the given local
* port. Support added in Landlock ABI version 10.
+ * - %LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP: Set the remote port of UDP
+ * sockets to the given port, or send datagrams to the given remote port
+ * ignoring any destination pre-set on a socket. Support added in
+ * Landlock ABI version 10.
+ *
+ * .. note:: Setting a remote address or sending a first datagram
+ * auto-binds UDP sockets to an ephemeral local source port if not
+ * already bound. To allow this if both %LANDLOCK_ACCESS_NET_BIND_UDP
+ * and %LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP are handled, you need to
+ * either:
+ *
+ * - use a socket already bound to a port before the ruleset started
+ * being enforced;
+ * - or grant %LANDLOCK_ACCESS_NET_BIND_UDP on port 0, meaning "any
+ * port in the ephemeral port range";
+ * - or grant %LANDLOCK_ACCESS_NET_BIND_UDP on a specific port, and
+ * call :manpage:`bind(2)` on that port before trying to
+ * :manpage:`connect(2)` or send datagrams.
+ *
+ * .. note:: Sending datagrams to an ``AF_UNSPEC`` destination address
+ * family is not supported for IPv6 UDP sockets: you will need to use a
+ * ``NULL`` address instead.
*/
/* clang-format off */
#define LANDLOCK_ACCESS_NET_BIND_TCP (1ULL << 0)
#define LANDLOCK_ACCESS_NET_CONNECT_TCP (1ULL << 1)
#define LANDLOCK_ACCESS_NET_BIND_UDP (1ULL << 2)
+#define LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP (1ULL << 3)
/* clang-format on */
/**
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index e676ebffeebe..851647197a01 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -46,6 +46,8 @@ static const char *const net_access_strings[] = {
[BIT_INDEX(LANDLOCK_ACCESS_NET_BIND_TCP)] = "net.bind_tcp",
[BIT_INDEX(LANDLOCK_ACCESS_NET_CONNECT_TCP)] = "net.connect_tcp",
[BIT_INDEX(LANDLOCK_ACCESS_NET_BIND_UDP)] = "net.bind_udp",
+ [BIT_INDEX(LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP)] =
+ "net.connect_send_udp",
};
static_assert(ARRAY_SIZE(net_access_strings) == LANDLOCK_NUM_ACCESS_NET);
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index c0f30a4591b8..a4d908b240a2 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -23,7 +23,7 @@
#define LANDLOCK_MASK_ACCESS_FS ((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
#define LANDLOCK_NUM_ACCESS_FS __const_hweight64(LANDLOCK_MASK_ACCESS_FS)
-#define LANDLOCK_LAST_ACCESS_NET LANDLOCK_ACCESS_NET_BIND_UDP
+#define LANDLOCK_LAST_ACCESS_NET LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP
#define LANDLOCK_MASK_ACCESS_NET ((LANDLOCK_LAST_ACCESS_NET << 1) - 1)
#define LANDLOCK_NUM_ACCESS_NET __const_hweight64(LANDLOCK_MASK_ACCESS_NET)
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 8da40614c452..0e697403eca9 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -44,7 +44,8 @@ int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
static int current_check_access_socket(struct socket *const sock,
struct sockaddr *const address,
const int addrlen,
- access_mask_t access_request)
+ access_mask_t access_request,
+ bool connecting)
{
unsigned short sock_family;
__be16 port;
@@ -75,19 +76,51 @@ static int current_check_access_socket(struct socket *const sock,
switch (address->sa_family) {
case AF_UNSPEC:
- if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP) {
+ if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP ||
+ (access_request == LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP &&
+ connecting)) {
/*
* Connecting to an address with AF_UNSPEC dissolves
- * the TCP association, which have the same effect as
- * closing the connection while retaining the socket
- * object (i.e., the file descriptor). As for dropping
- * privileges, closing connections is always allowed.
- *
- * For a TCP access control system, this request is
- * legitimate. Let the network stack handle potential
+ * the remote association while retaining the socket
+ * object (i.e., the file descriptor). For TCP, it has
+ * the same effect as closing the connection. For UDP,
+ * it removes any preset remote address. As for
+ * dropping privileges, these actions are always
+ * allowed.
+ * Let the network stack handle potential
* inconsistencies and return -EINVAL if needed.
*/
return 0;
+ } else if (access_request ==
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP) {
+ if (sock_family == AF_INET6) {
+ /*
+ * We cannot allow sending UDP datagrams to an
+ * explicit AF_UNSPEC address on IPv6 sockets,
+ * even if AF_UNSPEC is treated as "no address"
+ * on such sockets (so it should always be allowed).
+ * That's because the socket's family can change under
+ * our feet (if another thread calls setsockopt(IPV6_ADDRFORM))
+ * to IPv4, which would then treat AF_UNSPEC as
+ * AF_INET.
+ */
+ audit_net.family = AF_UNSPEC;
+ audit_net.sk = sock->sk;
+ landlock_init_layer_masks(
+ subject->domain, access_request,
+ &layer_masks, LANDLOCK_KEY_NET_PORT);
+ landlock_log_denial(
+ subject,
+ &(struct landlock_request){
+ .type = LANDLOCK_REQUEST_NET_ACCESS,
+ .audit.type =
+ LSM_AUDIT_DATA_NET,
+ .audit.u.net = &audit_net,
+ .access = access_request,
+ .layer_masks = &layer_masks,
+ });
+ return -EACCES;
+ }
} else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP ||
access_request == LANDLOCK_ACCESS_NET_BIND_UDP) {
/*
@@ -130,7 +163,11 @@ static int current_check_access_socket(struct socket *const sock,
} else {
WARN_ON_ONCE(1);
}
- /* Only for bind(AF_UNSPEC+INADDR_ANY) on IPv4 socket. */
+ /*
+ * AF_UNSPEC is treated as AF_INET only in
+ * bind(AF_UNSPEC+INADDR_ANY) on IPv4 sockets and
+ * when sending to AF_UNSPEC addresses on IPv4 sockets.
+ */
fallthrough;
case AF_INET: {
const struct sockaddr_in *addr4;
@@ -141,7 +178,8 @@ static int current_check_access_socket(struct socket *const sock,
addr4 = (struct sockaddr_in *)address;
port = addr4->sin_port;
- if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP) {
+ if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP ||
+ access_request == LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP) {
audit_net.dport = port;
audit_net.v4info.daddr = addr4->sin_addr.s_addr;
} else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP ||
@@ -164,7 +202,8 @@ static int current_check_access_socket(struct socket *const sock,
addr6 = (struct sockaddr_in6 *)address;
port = addr6->sin6_port;
- if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP) {
+ if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP ||
+ access_request == LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP) {
audit_net.dport = port;
audit_net.v6info.daddr = addr6->sin6_addr;
} else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP ||
@@ -221,6 +260,38 @@ static int current_check_access_socket(struct socket *const sock,
return -EACCES;
}
+static int current_check_autobind_udp_socket(struct socket *const sock)
+{
+ struct sockaddr_storage port0 = {};
+ unsigned short num;
+ bool slow;
+
+ /*
+ * On UDP sockets, if a local port has not already been bound,
+ * calling connect() or sending a first datagram has the side
+ * effect of autobinding an ephemeral port: we also have to check
+ * that the process would have had the right to bind(0) explicitly.
+ * Hold the socket lock around the inet_num read to exclude
+ * udp_lib_get_port()'s transient inet_num = snum write that is
+ * reverted to 0 on a failing reuseport bind.
+ */
+ slow = lock_sock_fast(sock->sk);
+ num = inet_sk(sock->sk)->inet_num;
+ unlock_sock_fast(sock->sk, slow);
+ if (num != 0)
+ return 0;
+
+ /*
+ * Construct a struct sockaddr* with port 0 to pretend the
+ * process tried to bind() on that address.
+ */
+ port0.ss_family = READ_ONCE(sock->sk->sk_family);
+
+ return current_check_access_socket(sock, (struct sockaddr *)&port0,
+ sizeof(port0),
+ LANDLOCK_ACCESS_NET_BIND_UDP, false);
+}
+
static int hook_socket_bind(struct socket *const sock,
struct sockaddr *const address, const int addrlen)
{
@@ -234,7 +305,7 @@ static int hook_socket_bind(struct socket *const sock,
return 0;
return current_check_access_socket(sock, address, addrlen,
- access_request);
+ access_request, false);
}
static int hook_socket_connect(struct socket *const sock,
@@ -242,19 +313,55 @@ static int hook_socket_connect(struct socket *const sock,
const int addrlen)
{
access_mask_t access_request;
+ int ret = 0;
if (sk_is_tcp(sock->sk))
access_request = LANDLOCK_ACCESS_NET_CONNECT_TCP;
+ else if (sk_is_udp(sock->sk))
+ access_request = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP;
else
return 0;
- return current_check_access_socket(sock, address, addrlen,
- access_request);
+ ret = current_check_access_socket(sock, address, addrlen,
+ access_request, true);
+
+ /*
+ * connect()ing to an AF_UNSPEC address does not trigger an
+ * autobind and should never be restricted.
+ */
+ if (ret == 0 && sk_is_udp(sock->sk) && address->sa_family != AF_UNSPEC)
+ ret = current_check_autobind_udp_socket(sock);
+
+ return ret;
+}
+
+static int hook_socket_sendmsg(struct socket *const sock,
+ struct msghdr *const msg, const int size)
+{
+ struct sockaddr *const address = msg->msg_name;
+ const int addrlen = msg->msg_namelen;
+ access_mask_t access_request;
+ int ret = 0;
+
+ if (sk_is_udp(sock->sk))
+ access_request = LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP;
+ else
+ return 0;
+
+ if (address != NULL)
+ ret = current_check_access_socket(sock, address, addrlen,
+ access_request, false);
+
+ if (ret == 0)
+ ret = current_check_autobind_udp_socket(sock);
+
+ return ret;
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
LSM_HOOK_INIT(socket_bind, hook_socket_bind),
LSM_HOOK_INIT(socket_connect, hook_socket_connect),
+ LSM_HOOK_INIT(socket_sendmsg, hook_socket_sendmsg),
};
__init void landlock_add_net_hooks(void)
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index ec392d971ea3..016c7277e370 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -1326,12 +1326,13 @@ FIXTURE_TEARDOWN(mini)
/* clang-format off */
-#define ACCESS_LAST LANDLOCK_ACCESS_NET_BIND_UDP
+#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP
#define ACCESS_ALL ( \
LANDLOCK_ACCESS_NET_BIND_TCP | \
LANDLOCK_ACCESS_NET_CONNECT_TCP | \
- LANDLOCK_ACCESS_NET_BIND_UDP)
+ LANDLOCK_ACCESS_NET_BIND_UDP | \
+ LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP)
/* clang-format on */
--
2.47.3
^ permalink raw reply related
* [PATCH v5 1/6] landlock: Add UDP bind() access control
From: Matthieu Buffet @ 2026-06-11 16:21 UTC (permalink / raw)
To: Mickaël Salaün, Günther Noack
Cc: linux-security-module, Mikhail Ivanov, konstantin.meskhidze,
Tingmao Wang, netdev, Matthieu Buffet
In-Reply-To: <20260611162107.49278-1-matthieu@buffet.re>
Add support for a first fine-grained UDP access right.
LANDLOCK_ACCESS_NET_BIND_UDP controls the ability to set the local port
of a UDP socket (via bind()). It will be useful for servers (to start
receiving datagrams), and for some clients that need to use a specific
source port (e.g. mDNS requires to use port 5353)
For obvious performance concerns, access control is only enforced when
configuring sockets, not when using them for common send/recv
operations.
Bump ABI to allow userspace to detect and use this new right.
Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
---
include/uapi/linux/landlock.h | 12 +++++++++---
security/landlock/audit.c | 1 +
security/landlock/limits.h | 2 +-
security/landlock/net.c | 18 ++++++++++++------
security/landlock/syscalls.c | 2 +-
tools/testing/selftests/landlock/base_test.c | 4 ++--
tools/testing/selftests/landlock/net_test.c | 5 +++--
7 files changed, 29 insertions(+), 15 deletions(-)
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 10a346e55e95..045b251ff1b4 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -201,9 +201,9 @@ struct landlock_net_port_attr {
* with ``setsockopt(IP_LOCAL_PORT_RANGE)``.
*
* A Landlock rule with port 0 and the %LANDLOCK_ACCESS_NET_BIND_TCP
- * right means that requesting to bind on port 0 is allowed and it will
- * automatically translate to binding on a kernel-assigned ephemeral
- * port.
+ * or %LANDLOCK_ACCESS_NET_BIND_UDP right means that requesting to bind
+ * on port 0 is allowed and it will automatically translate to binding
+ * on a kernel-assigned ephemeral port.
*/
__u64 port;
};
@@ -373,10 +373,16 @@ struct landlock_net_port_attr {
* port. Support added in Landlock ABI version 4.
* - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect TCP sockets to the given
* remote port. Support added in Landlock ABI version 4.
+ *
+ * And similarly for UDP port numbers:
+ *
+ * - %LANDLOCK_ACCESS_NET_BIND_UDP: Bind UDP sockets to the given local
+ * port. Support added in Landlock ABI version 10.
*/
/* clang-format off */
#define LANDLOCK_ACCESS_NET_BIND_TCP (1ULL << 0)
#define LANDLOCK_ACCESS_NET_CONNECT_TCP (1ULL << 1)
+#define LANDLOCK_ACCESS_NET_BIND_UDP (1ULL << 2)
/* clang-format on */
/**
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index 8d0edf94037d..e676ebffeebe 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -45,6 +45,7 @@ static_assert(ARRAY_SIZE(fs_access_strings) == LANDLOCK_NUM_ACCESS_FS);
static const char *const net_access_strings[] = {
[BIT_INDEX(LANDLOCK_ACCESS_NET_BIND_TCP)] = "net.bind_tcp",
[BIT_INDEX(LANDLOCK_ACCESS_NET_CONNECT_TCP)] = "net.connect_tcp",
+ [BIT_INDEX(LANDLOCK_ACCESS_NET_BIND_UDP)] = "net.bind_udp",
};
static_assert(ARRAY_SIZE(net_access_strings) == LANDLOCK_NUM_ACCESS_NET);
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index b454ad73b15e..c0f30a4591b8 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -23,7 +23,7 @@
#define LANDLOCK_MASK_ACCESS_FS ((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
#define LANDLOCK_NUM_ACCESS_FS __const_hweight64(LANDLOCK_MASK_ACCESS_FS)
-#define LANDLOCK_LAST_ACCESS_NET LANDLOCK_ACCESS_NET_CONNECT_TCP
+#define LANDLOCK_LAST_ACCESS_NET LANDLOCK_ACCESS_NET_BIND_UDP
#define LANDLOCK_MASK_ACCESS_NET ((LANDLOCK_LAST_ACCESS_NET << 1) - 1)
#define LANDLOCK_NUM_ACCESS_NET __const_hweight64(LANDLOCK_MASK_ACCESS_NET)
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 9eafc1dbf8ff..8da40614c452 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -88,15 +88,17 @@ static int current_check_access_socket(struct socket *const sock,
* inconsistencies and return -EINVAL if needed.
*/
return 0;
- } else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP) {
+ } else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP ||
+ access_request == LANDLOCK_ACCESS_NET_BIND_UDP) {
/*
* Binding to an AF_UNSPEC address is treated
* differently by IPv4 and IPv6 sockets. The socket's
* family may change under our feet due to
* setsockopt(IPV6_ADDRFORM), but that's ok: we either
- * reject entirely or require
- * %LANDLOCK_ACCESS_NET_BIND_TCP for the given port, so
- * it cannot be used to bypass the policy.
+ * reject entirely for IPv6 or require
+ * %LANDLOCK_ACCESS_NET_BIND_TCP or
+ * %LANDLOCK_ACCESS_NET_BIND_UDP for IPv4,
+ * so it cannot be used to bypass the policy.
*
* IPv4 sockets map AF_UNSPEC to AF_INET for
* retrocompatibility for bind accesses, only if the
@@ -142,7 +144,8 @@ static int current_check_access_socket(struct socket *const sock,
if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP) {
audit_net.dport = port;
audit_net.v4info.daddr = addr4->sin_addr.s_addr;
- } else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP) {
+ } else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP ||
+ access_request == LANDLOCK_ACCESS_NET_BIND_UDP) {
audit_net.sport = port;
audit_net.v4info.saddr = addr4->sin_addr.s_addr;
} else {
@@ -164,7 +167,8 @@ static int current_check_access_socket(struct socket *const sock,
if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP) {
audit_net.dport = port;
audit_net.v6info.daddr = addr6->sin6_addr;
- } else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP) {
+ } else if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP ||
+ access_request == LANDLOCK_ACCESS_NET_BIND_UDP) {
audit_net.sport = port;
audit_net.v6info.saddr = addr6->sin6_addr;
} else {
@@ -224,6 +228,8 @@ static int hook_socket_bind(struct socket *const sock,
if (sk_is_tcp(sock->sk))
access_request = LANDLOCK_ACCESS_NET_BIND_TCP;
+ else if (sk_is_udp(sock->sk))
+ access_request = LANDLOCK_ACCESS_NET_BIND_UDP;
else
return 0;
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index accfd2e5a0cd..d45469d5d464 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -166,7 +166,7 @@ static const struct file_operations ruleset_fops = {
* If the change involves a fix that requires userspace awareness, also update
* the errata documentation in Documentation/userspace-api/landlock.rst .
*/
-const int landlock_abi_version = 9;
+const int landlock_abi_version = 10;
/**
* sys_landlock_create_ruleset - Create a new ruleset
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 30d37234086c..6c8113c2ded1 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -76,8 +76,8 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(9, landlock_create_ruleset(NULL, 0,
- LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(10, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
LANDLOCK_CREATE_RULESET_VERSION));
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 4c528154ea92..ec392d971ea3 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -1326,11 +1326,12 @@ FIXTURE_TEARDOWN(mini)
/* clang-format off */
-#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_TCP
+#define ACCESS_LAST LANDLOCK_ACCESS_NET_BIND_UDP
#define ACCESS_ALL ( \
LANDLOCK_ACCESS_NET_BIND_TCP | \
- LANDLOCK_ACCESS_NET_CONNECT_TCP)
+ LANDLOCK_ACCESS_NET_CONNECT_TCP | \
+ LANDLOCK_ACCESS_NET_BIND_UDP)
/* clang-format on */
--
2.47.3
^ permalink raw reply related
* [PATCH v5 0/6] landlock: Add UDP access control support
From: Matthieu Buffet @ 2026-06-11 16:21 UTC (permalink / raw)
To: Mickaël Salaün, Günther Noack
Cc: linux-security-module, Mikhail Ivanov, konstantin.meskhidze,
Tingmao Wang, netdev, Matthieu Buffet
Hi,
This is V5 (hopefully final) of UDP access control in Landlock. It has
very few changes compared to v4, described below, all feedback given so
far should be in there (if not that's a mistake on my part). It adds
only two access rights, to restrict configuring local and remote
addresses on UDP sockets. The one that restricts setting a remote
address also controls sending datagrams to explicit remote addresses
-ignoring any remote address preset on the socket-. The one that
restricts binding to a local port also applies when the kernel
auto-binds an ephemeral port.
Changes v1->v2
==============
- recvmsg hook is gone and sendmsg hook doesn't apply when sending to a
remote address pre-set on socket, to improve performance
- don't add a get_addr_port() helper function, which required a weird
"am I in IPv4 or IPv6 context"
- reorder hook prologue for consistency: check domain, then type and
family
Changes v2->v3
==============
- removed support for sending datagrams with explicit destination
address of family AF_UNSPEC, which allowed to bypass restrictions with
a race condition
- rebased on linux-mic/next => add support for auditing
- fixed mistake in selftests when using unspec_srv variables, which were
implicitly of type SOCK_STREAM and did not actually test UDP code
- add tests for IPPROTO_IP
- improved docs, split off TCP-related refactoring
Changes v3->v4
==============
- merge LANDLOCK_ACCESS_NET_CONNECT_UDP and
LANDLOCK_ACCESS_NET_SENDTO_UDP into
LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP (everything that might set the
destination of a datagram)
- make LANDLOCK_ACCESS_NET_BIND_UDP apply when kernel is about to
auto-bind an ephemeral port for the caller. Block it if policy would
not allow an explicit call to bind(0)
- only deny sending AF_UNSPEC datagrams on IPv6 sockets, where there is
a risk of the address family changing midway
Changes v4->v5
==============
- fix unmarked racy socket address family accesses
- fix improper bind(0) autobind access check when connecting to AF_UNSPEC
- fix example code structure in documentation to match pattern of usage
used in the rest of the code
- fix bad copy-pastes in selftests, and some unimportant variable types
- squash LANDLOCK_ACCESS_NET_CONNECT_SEND_UDP commits
- add a small help note in sandboxer to point out the need to allow
binding a source port when emitting, to reduce surprises if people
try to get a feeling of the feature through sandboxer before reading
the docs
v1:
Link: https://lore.kernel.org/all/20240916122230.114800-1-matthieu@buffet.re/
v2:
Link: https://lore.kernel.org/all/20241214184540.3835222-1-matthieu@buffet.re/
v3:
Link: https://lore.kernel.org/all/20251212163704.142301-1-matthieu@buffet.re/
v4:
Link: https://lore.kernel.org/all/20260502124306.3975990-1-matthieu@buffet.re/
Based on https://git.kernel.org/pub/scm/linux/kernel/git/mic/linux.git
9ea6fb415fc8 ("selftests/landlock: Explicitly disable audit in teardowns")
from branch next.
All lines added are covered with selftests (net.c goes from 93.1% to 95.3%
line coverage).
Closes: https://github.com/landlock-lsm/linux/issues/10
Matthieu Buffet (6):
landlock: Add UDP bind() access control
landlock: Add UDP send+connect access control
selftests/landlock: Add tests for UDP bind/connect
selftests/landlock: Add tests for UDP send
samples/landlock: Add sandboxer UDP access control
landlock: Add documentation for UDP support
Documentation/userspace-api/landlock.rst | 91 +-
include/uapi/linux/landlock.h | 35 +-
samples/landlock/sandboxer.c | 41 +-
security/landlock/audit.c | 3 +
security/landlock/limits.h | 2 +-
security/landlock/net.c | 155 ++-
security/landlock/syscalls.c | 2 +-
tools/testing/selftests/landlock/base_test.c | 4 +-
tools/testing/selftests/landlock/net_test.c | 1166 ++++++++++++++++--
9 files changed, 1353 insertions(+), 146 deletions(-)
base-commit: 9ea6fb415fc8b535da91dadd74f948d96ba3d41d
--
2.47.3
^ permalink raw reply
* Re: [PATCH v2] hardening: Default randstruct off with rust for better allmodconfig support
From: Mark Brown @ 2026-06-11 9:23 UTC (permalink / raw)
To: Kees Cook
Cc: Miguel Ojeda, Gustavo A. R. Silva, Paul Moore, James Morris,
Serge E. Hallyn, Miguel Ojeda, Boqun Feng, Gary Guo,
Björn Roy Baron, Benno Lossin, Andreas Hindborg, Alice Ryhl,
Trevor Gross, Danilo Krummrich, linux-hardening,
linux-security-module, linux-kernel, rust-for-linux
In-Reply-To: <202606101339.66BFE3AA67@keescook>
[-- Attachment #1: Type: text/plain, Size: 453 bytes --]
On Wed, Jun 10, 2026 at 01:41:19PM -0700, Kees Cook wrote:
> For the linux-next testing, are you doing GCC + llvm rustc builds? IIUC,
> then the support patch mentioned, I think, doesn't actually solve the
> problem?
No, the allmodconfig builds are currently using LLVM - there's other
issues with rust+GCC, right now KASAN IIRC, and it generally seems like
it'll be more reliable to use LLVM to get the coverage. Both compilers
need to work anyway.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* [PATCH v2] cred: prevent slab cache merging for cred_jar
From: Mohammed EL Kadiri @ 2026-06-11 7:00 UTC (permalink / raw)
To: Paul Moore
Cc: Serge Hallyn, Vlastimil Babka, Kees Cook, linux-security-module,
linux-hardening, linux-kernel, Mohammed EL Kadiri
Add SLAB_NO_MERGE to cred_jar to ensure struct cred objects get
dedicated slab pages, preventing the allocator from merging this
cache with other similarly-sized caches. This is a hardening measure
to provide type isolation for credential objects.
Reviewed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Mohammed EL Kadiri <med08elkadiri@gmail.com>
---
Changes in v2:
- Collected Reviewed-by tag from Kees Cook.
- No code changes from v1.
kernel/cred.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/cred.c b/kernel/cred.c
index 9676965c0981..0e4ee60a5acd 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -557,7 +557,7 @@ void __init cred_init(void)
{
/* allocate a slab in which we can store credentials */
cred_jar = KMEM_CACHE(cred,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT | SLAB_NO_MERGE);
}
/**
--
2.43.0
^ permalink raw reply related
* Re: [PATCH v4 0/7] landlock: Add UDP access control support
From: Matthieu Buffet @ 2026-06-11 0:27 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Günther Noack, linux-security-module, Mikhail Ivanov,
konstantin.meskhidze, Tingmao Wang
In-Reply-To: <20260525.aeng6Xeula5o@digikod.net>
On 5/25/2026 10:28 PM, Mickaël Salaün wrote:
> Also, some interesting (and some other not relevant) findings here:
> https://sashiko.dev/#/patchset/20260502124306.3975990-1-matthieu%40buffet.re
No false positive in there afaict (e.g. connect(AF_UNSPEC) does not
trigger an autobind but was indeed incorrectly subjected to a bind(0)
access check).
I have just finished merging small fixes. Thanks!
--
Matthieu
^ permalink raw reply
* Re: [PATCH] cred: prevent slab cache merging for cred_jar
From: Paul Moore @ 2026-06-10 23:53 UTC (permalink / raw)
To: Mohammed EL Kadiri, Kees Cook
Cc: Serge Hallyn, Vlastimil Babka, linux-security-module,
linux-hardening, linux-kernel
In-Reply-To: <202606101510.7F6F4118@keescook>
On Wed, Jun 10, 2026 at 6:11 PM Kees Cook <kees@kernel.org> wrote:
> On Wed, Jun 10, 2026 at 10:07:24PM +0100, Mohammed EL Kadiri wrote:
> > Hi Kees,
> >
> > Thanks for the review!
> > Following Vlastimil and Jarkko's feedback on the key_jar patch, should
> > I send a v2 here as well with similar commit message modification:
> > removing CVE references, dropping the skbuff comparison, and framing
> > it as hardening?
>
> It wouldn't hurt, yeah. I have that kind of already in my head while I
> read these patches, but it would be better for other folks to see it
> framed more accurately.
Just as an FYI, the patch seems reasonable to me, but considering
where we are in the dev cycle I figured it best to wait until after
the upcoming merge window to do anything with it.
> > On Wed, Jun 10, 2026 at 9:45 PM Kees Cook <kees@kernel.org> wrote:
> > >
> > > On Sat, Jun 06, 2026 at 03:25:58PM +0100, Mohammed EL Kadiri wrote:
> > > > The cred_jar slab cache holds struct cred objects, which contain
> > > > process credentials: uid, gid, euid, egid, and capability sets.
> > > > Overwriting any of these fields is sufficient for privilege escalation.
> > > >
> > > > On a default Ubuntu 6.17.0-23-generic system, cred_jar (named "cred"
> > > > in sysfs) has 2 aliases, meaning 2 unrelated object types share its
> > > > slab pages (object_size=184, objs_per_slab=42).
> > > >
> > > > Cross-cache heap exploitation relies on slab cache merging to achieve
> > > > type confusion between unrelated kernel objects. CVE-2022-29582
> > > > demonstrates this technique: an io_uring use-after-free is leveraged
> > > > across cache boundaries through page-level reallocation, ultimately
> > > > achieving root. struct cred is a primary target in this class of
> > > > attacks due to the direct privilege escalation that results from
> > > > corrupting any of its identity or capability fields.
> > > >
> > > > Add SLAB_NO_MERGE to ensure cred_jar receives dedicated slab pages,
> > > > so that freed credential slots can only be reallocated as struct cred
> > > > objects. The memory overhead is minimal: one struct cred exists per
> > > > task, and with 42 objects per slab page, the cost of dedicated pages
> > > > is negligible. There is zero performance impact on the allocation
> > > > hot path.
> > > >
> > > > This follows the precedent set by skbuff_head_cache (net/core/skbuff.c)
> > > > and key_jar (security/keys/key.c) which use SLAB_NO_MERGE for similar
> > > > isolation requirements.
> > > >
> > > > Signed-off-by: Mohammed EL Kadiri <med08elkadiri@gmail.com>
> > >
> > > Yes please. :)
> > >
> > > Reviewed-by: Kees Cook <kees@kernel.org>
--
paul-moore.com
^ permalink raw reply
* Re: [PATCH bpf-next 3/5] bpftool: Cover loader metadata with the program signature
From: bot+bpf-ci @ 2026-06-10 23:48 UTC (permalink / raw)
To: daniel, ast
Cc: kpsingh, James.Bottomley, paul, bboscaccy, memxor, torvalds, bpf,
linux-security-module, ast, andrii, daniel, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai
In-Reply-To: <20260610230329.727075-4-daniel@iogearbox.net>
[-- Attachment #1: Type: text/plain, Size: 1746 bytes --]
> bpftool: Cover loader metadata with the program signature
>
> bpftool_prog_sign() signed only the loader instructions. The metadata
> blob the loader installs was left to an in-loader hash check, which
> the kernel now performs at load time over insns || metadata.
>
> Sign that same concatenation: pass the metadata blob (gen_loader_opts
> data) through to bpftool_prog_sign() and feed insns || metadata to
> CMS_final(). The excl_prog_hash stays a digest of the instructions
> alone; it binds the metadata map to the loader and is matched against
> prog->digest by the verifier, independent of what the signature covers.
>
> The signed artifact is now plain data: both bytes the signature
> covers are embedded verbatim in the generated skeleton, so signing
> and verifying an lskel is an ordinary CMS operation that a signer or
> auditor can perform (or reproduce) offline, without analyzing loader
> bytecode to establish what the signature actually attests to [0].
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This closes a gap where the loader metadata was left outside the program
signature. The signing path in bpftool_prog_sign() was introduced by commit
40863f4d6ef2 ("bpftool: Add support for signing BPF programs"), which fed
only opts->insns to the signature and set only sopts.insns/sopts.insns_sz in
gen_trace(). Should this carry a Fixes: tag?
Fixes: 40863f4d6ef2 ("bpftool: Add support for signing BPF programs")
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/27312517811
^ permalink raw reply
* Re: [bug report] apparmor: add support loading per permission tagging
From: John Johansen @ 2026-06-10 23:12 UTC (permalink / raw)
To: Dan Carpenter; +Cc: apparmor, linux-security-module
In-Reply-To: <adjOGC9qtr_9XkgS@stanley.mountain>
On 4/10/26 03:16, Dan Carpenter wrote:
> Hello John Johansen,
>
> Commit 3d28e2397af7 ("apparmor: add support loading per permission
> tagging") from Apr 1, 2025 (linux-next), leads to the following
> Smatch static checker warning:
>
> security/apparmor/policy_unpack.c:883 unpack_tags()
> warn: missing error code 'error'
>
Sorry for late reply, I am slowly working through the backlog
A managed to get a fix for this into 7.1
72971e6f745ad apparmor: fix unpack_tags to properly return error in failure cases
thanks
john
> security/apparmor/policy_unpack.c
> 852 static int unpack_tags(struct aa_ext *e, struct aa_tags_struct *tags,
> 853 const char **info)
> 854 {
> 855 int error = -EPROTO;
> 856 void *pos = e->pos;
> 857
> 858 AA_BUG(!tags);
> 859 /* policy tags are optional */
> 860 if (aa_unpack_nameX(e, AA_STRUCT, "tags")) {
> 861 u32 version;
> 862
> 863 if (!aa_unpack_u32(e, &version, "version") || version != 1) {
> 864 *info = "invalid tags version";
> 865 goto fail_reset;
> 866 }
> 867 error = unpack_strs_table(e, "strs", true, &tags->strs);
> 868 if (error) {
> 869 *info = "failed to unpack profile tag.strs";
> 870 goto fail;
> 871 }
> 872 error = unpack_tag_headers(e, tags);
> 873 if (error) {
> 874 *info = "failed to unpack profile tag.headers";
> 875 goto fail;
> 876 }
> 877 error = unpack_tagsets(e, tags);
> 878 if (error) {
> 879 *info = "failed to unpack profile tag.sets";
> 880 goto fail;
> 881 }
> 882 if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
> --> 883 goto fail;
>
> set the error code here
>
> 884
> 885 if (!verify_tags(tags, info))
> 886 goto fail;
>
> and here
>
> 887 }
> 888
> 889 return 0;
> 890
> 891 fail:
> 892 aa_destroy_tags(tags);
> 893 fail_reset:
> 894 e->pos = pos;
> 895 return error;
> 896 }
>
> This email is a free service from the Smatch-CI project [smatch.sf.net].
>
> regards,
> dan carpenter
^ permalink raw reply
* [PATCH bpf-next 5/5] Documentation/bpf: Add BPF signing and enforcement doc
From: Daniel Borkmann @ 2026-06-10 23:03 UTC (permalink / raw)
To: ast
Cc: kpsingh, James.Bottomley, paul, bboscaccy, memxor, torvalds, bpf,
linux-security-module
In-Reply-To: <20260610230329.727075-1-daniel@iogearbox.net>
Describe the BPF signing design end to end: why a trusted loader is
needed, the signature(insns || metadata) contract, load-time
verification via fd_array (exclusive + frozen maps), the binary
BPF_SIG_{UNSIGNED,VERIFIED} verdict, and how [BPF] LSMs can enforce
policy on it.
This writes down the contract on the discussion points with the LSM /
integrity folks [0][1]: by the time security_bpf_prog_load() is
called, signature verification has fully completed and covers the
instructions plus the frozen contents of every bound exclusive map;
there is no intermediate "loader verified, payload pending" state
to reason about; and what BPF_SIG_VERIFIED means at each hook is
spelled out explicitly, including the post-verifier coverage check
that keeps the verdict binary.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/bc823ddbaf63e0e177eb46d1cc15076e4e2e689d.camel@HansenPartnership.com [0]
Link: https://lore.kernel.org/bpf/CAHC9VhSDkwGgPfrBUh7EgBKEJj_JjnY68c0YAmuuLT_i--GskQ@mail.gmail.com [1]
---
Documentation/bpf/index.rst | 1 +
Documentation/bpf/signing.rst | 537 ++++++++++++++++++++++++++++++++++
2 files changed, 538 insertions(+)
create mode 100644 Documentation/bpf/signing.rst
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 0d5c6f659266..638a00d42bc2 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -28,6 +28,7 @@ that goes into great technical depth about the BPF Architecture.
classic_vs_extended.rst
bpf_iterators
bpf_licensing
+ signing
test_debug
clang-notes
linux-notes
diff --git a/Documentation/bpf/signing.rst b/Documentation/bpf/signing.rst
new file mode 100644
index 000000000000..24997ea50345
--- /dev/null
+++ b/Documentation/bpf/signing.rst
@@ -0,0 +1,537 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+BPF signing
+============
+
+This document describes how BPF programs are cryptographically signed, how the
+kernel verifies them at load time, and how Linux Security Modules (LSMs) -
+including the BPF LSM - use the resulting verdict to enforce policy. It is
+written for developers who want to produce signed BPF objects, understand what
+the signature actually guarantees, or build a policy on top of it.
+
+Motivation
+==========
+
+A signed BPF program lets the kernel establish that the bytecode being loaded
+originates from a trusted producer and was not modified in transit. On its own
+the kernel does not *require* signatures - an unsigned program loads exactly as
+before - but it records a verdict (see `The verdict`_) that an LSM can gate on.
+This is the building block for policies such as "only run BPF that was signed by
+a key in the trusted keyring", as enforced for instance by IPE.
+
+Signing is orthogonal to the existing permission model: it does not replace the
+capability checks or the verifier. A signed load still requires the usual
+privileges (``CAP_BPF`` and any program-type-specific capability, subject to
+``kernel.unprivileged_bpf_disabled``), and the loader's instructions are still
+checked by the verifier like any other program. A valid signature establishes
+*origin and integrity*, not safety - it lets a policy trust where the bytecode
+came from, it does not let a load skip any check it would otherwise face.
+
+The hard part is *what* gets signed. A naive scheme would sign a program's
+instruction buffer at build time and verify that signature at
+``BPF_PROG_LOAD``. That does not survive contact with real BPF objects, because
+the bytes the kernel finally loads are not the bytes the developer built and
+signed. Between the two, libbpf and the kernel rewrite the program:
+
+- **map file descriptors** are patched into ``ld_imm64`` instructions
+ (``BPF_PSEUDO_MAP_FD``), and a map's fd is assigned at load time, so it
+ differs on every run;
+- **CO-RE relocations** rewrite field offsets, sizes and existence flags against
+ the *running* kernel's BTF, so the result differs from one kernel to the next;
+- **kfunc and ksym references** are resolved to ids/addresses in the running
+ kernel;
+- **global data** (``.rodata``/``.data``/``.bss``) is created and seeded as maps
+ at load.
+
+So a signature over the original instructions cannot match the relocated
+instructions the verifier ends up checking, and the relocated form cannot be
+produced ahead of time because it depends on the target kernel. There is no
+fixed byte string that is both signable at build time and what the kernel
+actually loads - which is why a program cannot simply be signed and loaded
+directly.
+
+The trusted loader
+==================
+
+The solution is to move that setup work *into* a small BPF program - the
+**loader** - and sign the loader instead of the individual programs. libbpf's
+``gen_loader`` machinery (``bpftool gen skeleton -L``, the "light skeleton")
+emits a ``BPF_PROG_TYPE_SYSCALL`` program whose body performs the bpf() syscalls
+that create maps, apply relocations, and load the real programs. The payload it
+installs - the serialized programs, map descriptions, relocation data and
+initial values - lives in a separate array map, the **metadata map**
+(``__loader.map``).
+
+So the unit of trust is the loader, and the signing contract is::
+
+ Sig(I_loader || D_meta)
+
+where ``I_loader`` is the loader's instruction stream and ``D_meta`` is the
+content of the metadata map. Verifying the loader's signature establishes that
+both the loader *and* the payload it is about to install are authentic. The
+loader is reproducible: ``gen_loader`` builds it from primitives so the same
+object yields the same bytes on any build host.
+
+Why the loader is signable when the program is not
+--------------------------------------------------
+
+The loader sidesteps every rewrite listed above, because the bytes that are
+signed are *relocation-invariant*:
+
+- The loader's own instructions are a fixed sequence of bpf() syscalls emitted
+ by ``gen_loader``; they carry no CO-RE relocations and resolve no ksyms, so
+ they are identical on every kernel. The metadata map is referenced by *index*
+ into ``fd_array`` (``BPF_PSEUDO_MAP_IDX``), not by a baked-in file descriptor,
+ so even that reference does not change between build and load. The loader
+ instruction bytes the kernel verifies are exactly the bytes that were signed.
+- The metadata map is opaque, frozen data - the serialized target programs,
+ their relocation records, map descriptions and initial values. Its bytes are
+ identical at build time and at load time, so they are simply appended to the
+ instructions and covered by the same signature (there is no separate metadata
+ hash to compute or compare).
+
+All the host-specific rewriting - creating maps, patching their fds into the
+target programs, applying CO-RE, resolving ksyms, seeding global data - still
+happens, but it happens *inside the loader at runtime*, on the verified
+metadata, **after** the kernel has verified the ``insns || metadata`` signature.
+The kernel never has to verify the relocated target programs: it verifies the
+loader and its inputs once, and trust transfers to whatever that now-trusted,
+deterministic loader installs. The relocation step is moved from "before the
+signature can be checked" to "after a trusted program runs" - which is exactly
+what makes it signable.
+
+Because the metadata map is the loader's only untrusted input, two existing map
+properties are reused to keep it trustworthy across the load:
+
+Exclusive maps
+ A map created with ``excl_prog_hash`` (see ``BPF_MAP_CREATE``) may only be
+ accessed by a program whose digest matches that hash. The verifier enforces
+ ``map->excl_prog_sha == prog->digest`` for every map a program uses, so the
+ metadata map is bound to exactly the signed loader and cannot be shared with
+ or mutated by another program.
+
+Frozen maps
+ The metadata map is frozen (``BPF_MAP_FREEZE``) before the loader is loaded.
+ Freezing blocks further userspace writes, so the bytes folded into the
+ signature cannot change before the loader runs. (Freezing does not make the
+ map read-only to the loader program itself, which still writes created file
+ descriptors back into the blob's scratch area.)
+
+Load-time verification
+=======================
+
+Rather than have the loader check its own metadata from within BPF, the kernel
+verifies it directly at ``BPF_PROG_LOAD``, with no new UAPI. The mechanism
+reuses the existing ``fd_array``:
+
+#. Userspace creates the metadata map with ``excl_prog_hash`` set to the
+ loader's digest, populates it, and freezes it.
+#. The loader is loaded with ``signature``/``signature_size``/``keyring_id``
+ set, the metadata map referenced through ``fd_array``, and ``fd_array_cnt``
+ set so the kernel knows the array's length.
+#. When a signature is present and ``fd_array_cnt`` is non-zero, every map in
+ ``fd_array`` must be exclusive (carry ``excl_prog_sha``); a non-exclusive map
+ there is rejected (``-EINVAL``). The kernel appends each map's frozen contents
+ to the instruction buffer and verifies the PKCS#7 signature over the
+ concatenation ``insns || metadata_0 || metadata_1 || ...`` in ``fd_array``
+ order.
+
+A signed program therefore takes one of exactly two shapes, both fully
+supported:
+
+- **No bound maps** (``fd_array_cnt == 0``): there is nothing to append, so the
+ kernel verifies the signature over the instructions alone. A valid signature
+ yields ``BPF_SIG_VERIFIED`` and the program loads. This is the ordinary case
+ for a directly-loaded signed program with no separate payload; it is *not*
+ rejected for "missing" metadata, because it has none to cover.
+- **Exclusive bound maps** (``fd_array_cnt > 0``): every entry is exclusive and
+ folded, so the signature covers ``insns || metadata``.
+
+There is no third shape: a non-exclusive map in a signed program's ``fd_array``
+is rejected rather than silently left out of the signature, so a program bound
+to a signed loader never has a map the signature does not cover.
+
+The digest binding (``excl_prog_sha == prog->digest``) is enforced by the
+verifier as usual; because that check runs while ``fd_array`` is resolved -
+before the verifier would otherwise compute the tag - ``prog->digest`` is
+computed up front, over the unmodified (signature-covered) instructions, for any
+load that folds metadata.
+
+After the verifier has populated ``used_maps``, the kernel additionally requires
+that every *exclusive* map the program uses is one that was folded into the
+signature, and rejects the load (``-EACCES``) otherwise. This backstops the
+``fd_array`` rule above for an exclusive map the program reaches by other means
+(for example a directly-referenced fd): such a map is code-bearing but not
+covered, so the load is rejected. Together they keep the verdict binary - a
+signed program cannot read from an exclusive (code-bearing) map its signature
+does not cover, and a different but equally digest-bound map cannot be
+substituted at the ``fd_array`` slot the loader reads. Non-exclusive maps the
+program reaches by other means are runtime data, not part of the signed
+artifact, and need not be covered.
+
+The verdict
+===========
+
+A program is either unsigned or fully verified - there is no intermediate
+state. The outcome is recorded in ``prog->aux->sig.verdict``:
+
+.. code-block:: c
+
+ enum bpf_sig_verdict {
+ BPF_SIG_UNSIGNED = 0,
+ BPF_SIG_VERIFIED,
+ };
+
+``BPF_SIG_VERIFIED`` means the signature is valid and covers the instructions
+*and* the frozen contents of every exclusive map the program uses:
+
+- For an ordinary, directly-loaded signed program the instructions are the whole
+ artifact and it uses no exclusive maps, so a valid instruction signature is
+ the complete verification.
+- For a signed loader the metadata map is exclusive, so its contents are folded
+ in and the signature covers ``insns || metadata``.
+
+There is deliberately no "instructions verified but metadata not" verdict: a
+signed loader that fails to cover its metadata is *rejected* (see above), not
+recorded with a weaker verdict. ``BPF_SIG_VERIFIED`` therefore always means the
+program and everything the signature is responsible for are authentic, which is
+what a policy can rely on.
+
+Alongside the verdict the kernel records which keyring validated the signature;
+see `Keyrings`_.
+
+Enforcement via LSMs
+====================
+
+Signing only *records* a verdict; an LSM turns it into policy. The verdict and
+keyring fields live in ``struct bpf_prog_aux``, so a BPF LSM program can read
+them directly (see Documentation/bpf/prog_lsm.rst for writing and attaching BPF
+LSM programs); the same fields are equally available to in-tree LSMs such as
+IPE. Two hooks are useful at different points of the load: the dedicated
+``security_bpf_prog_load()`` gates admission before the verifier runs, and the
+existing ``security_bpf_prog()`` observes a program that has fully loaded.
+
+Admission: ``security_bpf_prog_load()``
+---------------------------------------
+
+The existing hook, called at ``BPF_PROG_LOAD`` entry, **for every load**,
+before the verifier runs. By this point the verdict and keyring fields are set,
+so the hook can see whether - and how strongly - the program was signed, which
+keyring validated it, the load ``attr``, the BPF token and whether the load came
+from the kernel.
+
+This is the place for *coarse admission* that must also see unsigned and
+not-yet-verified loads: require a signature at all, restrict the acceptable
+keyring, restrict which token/credentials may load BPF, apply per-program-type
+rules, or audit every attempt. It is the primary deny point.
+
+One subtlety: this runs before the verifier, so although the verdict is already
+``BPF_SIG_VERIFIED`` for a valid signature, the kernel has not *yet* confirmed
+that the program only uses exclusive maps the signature covers. That check
+happens after verification, and a load that violates it is rejected (``-EACCES``)
+regardless of the LSM. So ``BPF_SIG_VERIFIED`` *here* means "validly signed"; a
+program that would read an uncovered exclusive map is still rejected before it
+ever loads, and by the time it has fully loaded (see the next hook) the verdict
+carries its full meaning.
+
+A more realistic admission policy than "is it signed at all": accept programs
+signed by a system keyring, accept a user-keyring signature only if the
+key/keyring it was verified against is on an explicit allowlist, and emit a
+tamper-evident record of every decision so that even denied attempts are
+auditable. (Illustrative - error checking elided.)
+
+.. code-block:: c
+
+ /* Serials of user keys/keyrings we additionally trust. */
+ struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __s32); /* keyring_serial */
+ __type(value, __u8);
+ __uint(max_entries, 64);
+ } trusted_user_keys SEC(".maps");
+
+ /* Audit stream consumed by a userspace logger. */
+ struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 16);
+ } audit SEC(".maps");
+
+ struct decision { __u32 prog_type, verdict, ktype; __s32 serial, ret; };
+
+ SEC("lsm/bpf_prog_load")
+ int BPF_PROG(admit, struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token, bool kernel)
+ {
+ __u32 verdict = prog->aux->sig.verdict;
+ __u32 ktype = prog->aux->sig.keyring_type;
+ __s32 serial = prog->aux->sig.keyring_serial;
+ struct decision *d;
+ int ret = 0;
+
+ if (kernel)
+ return 0; /* trust in-kernel loads */
+
+ if (verdict != BPF_SIG_VERIFIED)
+ ret = -EPERM; /* must be validly signed */
+ else if (ktype == BPF_SIG_KEYRING_USER &&
+ !bpf_map_lookup_elem(&trusted_user_keys, &serial))
+ ret = -EPERM; /* key/keyring not allowlisted */
+
+ d = bpf_ringbuf_reserve(&audit, sizeof(*d), 0);
+ if (d) {
+ d->prog_type = attr->prog_type;
+ d->verdict = verdict;
+ d->ktype = ktype;
+ d->serial = serial;
+ d->ret = ret;
+ bpf_ringbuf_submit(d, 0); /* record allow *and* deny */
+ }
+ return ret;
+ }
+
+Observing a verified load: ``security_bpf_prog()``
+--------------------------------------------------
+
+There is deliberately no separate "metadata attested" hook. The coverage check
+above is enforced by the kernel unconditionally, so a signed loader that fails
+to cover its metadata never loads and an LSM never has to re-establish that
+fact. To *act on* a program that has successfully and fully loaded, use the
+existing ``security_bpf_prog()`` hook (``lsm/bpf_prog``), which fires from
+``bpf_prog_new_fd()`` - after the verifier, after the coverage check, and after
+``bpf_prog_alloc_id()``. Relative to the admission hook this point is strictly
+later and stronger:
+
+- the program has an id (``prog->aux->id``), so it can be recorded or correlated
+ with later events;
+- ``verdict == BPF_SIG_VERIFIED`` *here* means **fully** verified - a program
+ that used an uncovered exclusive map was already rejected, so it cannot reach
+ this point;
+- it observes only programs that actually loaded; a failed load never mints an
+ fd, so it never reaches this hook.
+
+It takes only the ``prog`` and a non-zero return still aborts (the fd is not
+handed out), so it can veto as well as observe. One wrinkle: it also fires on
+other paths that mint a new program fd - notably ``bpf_prog_get_fd_by_id()`` -
+not just on a fresh load. Because the program already has its id here, an LSM
+can tell the two apart with a small hash map: the *first* time an id is seen is
+the load; a later sighting of the same id is just another fd to a program that
+already exists.
+
+To bound the map and let a reused id read as a fresh load, this can be paired
+with ``security_bpf_prog_free()`` (``lsm/bpf_prog_free``), which deletes the
+entry on teardown - keyed by the same ``prog`` pointer, since
+``bpf_prog_free_id()`` has already cleared ``prog->aux->id`` to ``0`` by the time
+that hook runs. (Illustrative - privileged LSM, error checking elided.)
+
+.. code-block:: c
+
+ struct rec { __u32 id, ktype; __s32 serial; };
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64); /* struct bpf_prog * -- stable id */
+ __type(value, struct rec);
+ __uint(max_entries, 4096);
+ } live SEC(".maps");
+
+ SEC("lsm/bpf_prog") /* fires after load and on every later fd */
+ int BPF_PROG(observe, struct bpf_prog *prog)
+ {
+ __u64 key = (__u64)(unsigned long)prog;
+ struct rec r;
+
+ if (prog->aux->sig.verdict != BPF_SIG_VERIFIED)
+ return 0;
+ if (bpf_map_lookup_elem(&live, &key))
+ return 0; /* seen before: a later fd, not a load */
+
+ /* First sighting == this program just loaded; id is valid here. */
+ r.id = prog->aux->id;
+ r.ktype = prog->aux->sig.keyring_type;
+ r.serial = prog->aux->sig.keyring_serial;
+ bpf_map_update_elem(&live, &key, &r, BPF_NOEXIST);
+ /* ... newly-loaded verified-program action, e.g. record r.id ... */
+ return 0;
+ }
+
+Putting them together: to *require* verified BPF, deny at the admission hook
+unless the verdict is ``BPF_SIG_VERIFIED`` (and, if desired, restrict the
+keyring). The kernel then guarantees that any program which actually loads with
+that verdict covered all of its exclusive maps, rejecting any that did not - so
+a deny-by-default admission policy needs no second enforcement point. Use
+``security_bpf_prog()`` to record or finally gate the verified programs once
+they carry an id. The ``verdict``, ``keyring_type`` and ``keyring_serial`` fields
+let a policy distinguish, for example, "verified and signed by a builtin key"
+from "verified by a user key". Policy LSMs such as IPE consume the same hooks to
+enforce system policy without writing any BPF.
+
+Keyrings
+========
+
+``keyring_id`` selects the trusted keyring the PKCS#7 signature is verified
+against. The well-known ids ``0`` (builtin), ``VERIFY_USE_SECONDARY_KEYRING``
+and ``VERIFY_USE_PLATFORM_KEYRING`` select the corresponding system keyrings;
+any other value is treated as the serial of a user/session key or keyring.
+The keyring is looked up first, before the signature bytes are examined, so a
+signature naming a non-existent keyring is rejected up front, and a failed
+verification aborts the load - so a program that loads successfully with a
+signature always has consistent keyring fields recorded.
+
+Two fields are recorded in ``prog->aux->sig`` for an LSM to inspect:
+
+``keyring_type`` (``enum bpf_sig_keyring``)
+ Classified purely from ``keyring_id`` whenever the program is signed:
+ ``BPF_SIG_KEYRING_BUILTIN``, ``_SECONDARY``, ``_PLATFORM`` for the system
+ keyrings, or ``_USER`` for a user/session keyring. It is
+ ``BPF_SIG_KEYRING_NONE`` for an unsigned program.
+
+``keyring_serial`` (``s32``)
+ Set **only** on a successful verification, to the serial of the
+ **user/session key or keyring** that ``keyring_id`` resolved to - the
+ object the signature was verified against, not the individual asymmetric
+ key inside it that matched the signer. Passing
+ ``KEY_SPEC_SESSION_KEYRING``, for example, records the session keyring's
+ serial. The system keyrings are trusted as a whole and expose no serial
+ here, so the serial is ``0`` for builtin, secondary and platform
+ signatures, and ``0`` for unsigned programs. In other words, a non-zero
+ ``keyring_serial`` is exactly "verified against the user key/keyring with
+ this serial".
+
+.. list-table::
+ :header-rows: 1
+
+ * - ``keyring_id``
+ - ``keyring_type``
+ - ``keyring_serial``
+ * - (no signature)
+ - ``BPF_SIG_KEYRING_NONE``
+ - ``0``
+ * - ``0``
+ - ``BPF_SIG_KEYRING_BUILTIN``
+ - ``0``
+ * - ``VERIFY_USE_SECONDARY_KEYRING``
+ - ``BPF_SIG_KEYRING_SECONDARY``
+ - ``0``
+ * - ``VERIFY_USE_PLATFORM_KEYRING``
+ - ``BPF_SIG_KEYRING_PLATFORM``
+ - ``0``
+ * - other (a user/session key serial)
+ - ``BPF_SIG_KEYRING_USER``
+ - serial of the resolved key/keyring
+
+Producing a signed object
+==========================
+
+``bpftool`` generates and signs a light skeleton in one step::
+
+ bpftool gen skeleton -L -S -k <private_key.pem> -i <certificate.x509> \
+ obj.bpf.o > obj.lskel.h
+
+``-L`` selects the light-skeleton (``gen_loader``) backend and ``-S`` enables
+signing; ``-k`` and ``-i`` supply the signing key and its X.509 certificate.
+``bpftool`` signs ``insns || metadata`` - the exact bytes the kernel
+reconstructs - and also computes ``excl_prog_hash`` as the digest of the loader
+instructions so the metadata map can be bound to the loader. The signature,
+certificate and hash are embedded in the generated header; loading the skeleton
+performs the create/populate/freeze/load sequence described above.
+
+At runtime the trusted public key must be present in the chosen keyring (for
+example added to the session keyring, or built into the kernel's builtin trusted
+keyring) for verification to succeed.
+
+UAPI reference
+==============
+
+``BPF_PROG_LOAD`` (``union bpf_attr``):
+
+``signature``, ``signature_size``
+ Pointer to and length of the PKCS#7 signature blob.
+
+``keyring_id``
+ Trusted keyring selector (see `Keyrings`_).
+
+``fd_array``, ``fd_array_cnt``
+ Array of map file descriptors bound to the program. ``fd_array_cnt`` must be
+ set for the kernel to scan the array. When a signature is present, every map
+ in the array must be exclusive; its frozen contents are folded into the
+ verified buffer, and a non-exclusive entry is rejected (``-EINVAL``).
+
+``BPF_MAP_CREATE`` (``union bpf_attr``):
+
+``excl_prog_hash``, ``excl_prog_hash_size``
+ SHA-256 digest of the program permitted to access this (exclusive) map. This
+ binds the metadata map to the loader; it is not a hash of the map *content*.
+ The map content is not hashed separately at all - it is covered, as bytes,
+ by the program signature.
+
+Failure modes
+=============
+
+When a signature is present but the load cannot be authenticated, the load is
+rejected; it is never silently downgraded to unsigned. The common rejections:
+
+.. list-table::
+ :header-rows: 1
+ :widths: 30 15 55
+
+ * - Condition
+ - errno
+ - Notes
+ * - Signature does not validate, or no trusted key in the selected keyring
+ matched the signer
+ - ``-EKEYREJECTED`` / ``-ENOKEY``
+ - Surfaced from the PKCS#7 verification layer: ``-EKEYREJECTED`` for an
+ invalid signature, ``-ENOKEY`` when no key in the keyring matches.
+ * - ``keyring_id`` does not resolve to a usable keyring / key
+ - ``-EINVAL``
+ - The keyring is looked up before the signature bytes are examined.
+ * - ``signature_size`` too large
+ - ``-EINVAL``
+ - A practical PKCS#7 signature is well under the cache-allocation limit.
+ * - Non-exclusive map in a signed program's ``fd_array``
+ - ``-EINVAL``
+ - Every folded map must carry ``excl_prog_sha`` (see `Load-time
+ verification`_).
+ * - ``fd_array_cnt`` exceeds the maximum number of used maps
+ - ``-E2BIG``
+ -
+ * - ``insns || metadata`` exceeds the dynptr size cap (~16 MiB)
+ - ``-E2BIG``
+ - The instructions and folded maps are verified as one ``bpf_dynptr``.
+ * - A folded (exclusive) map is not frozen
+ - ``-EPERM``
+ - Freezing is required so the hashed bytes cannot change before the loader
+ runs.
+ * - Program uses an exclusive map the signature does not cover
+ - ``-EACCES``
+ - The post-verifier binding check; keeps the verdict binary (see `The
+ verdict`_).
+
+An unsigned program (no ``signature``) is never rejected by this path; it simply
+loads with the ``BPF_SIG_UNSIGNED`` verdict, leaving any policy decision to an
+LSM.
+
+Testing
+=======
+
+The ``signed_loader`` test in ``tools/testing/selftests/bpf`` exercises the full
+path: it drives map-less and map-owning objects through ``gen_loader``, signs
+``insns || metadata``, loads with ``fd_array_cnt`` set, runs the loader, and
+confirms the target program and map are installed. ``lsm_signature_verdict``
+additionally attaches a BPF LSM program and asserts the observed verdict
+(``BPF_SIG_UNSIGNED`` and ``BPF_SIG_VERIFIED``), and that a signed loader which
+does not fold its metadata is rejected. The signed light skeletons
+``fentry_test``, ``fexit_test`` and ``atomics`` exercise the same load path
+through real generated-and-signed skeletons.
+
+Notes and limitations
+======================
+
+- The instructions plus folded metadata are verified as one ``bpf_dynptr``,
+ which bounds the combined size (currently ~16 MiB); very large objects can
+ exceed it.
+- The metadata container is a single-element array map, accessed through
+ ``map_direct_value_addr``.
+- The verdict and the LSM hooks are kernel-internal; the verdict is not part of
+ the stable UAPI.
--
2.43.0
^ permalink raw reply related
* [PATCH bpf-next 4/5] selftests/bpf: Verify load-time signed loader metadata
From: Daniel Borkmann @ 2026-06-10 23:03 UTC (permalink / raw)
To: ast
Cc: kpsingh, James.Bottomley, paul, bboscaccy, memxor, torvalds, bpf,
linux-security-module
In-Reply-To: <20260610230329.727075-1-daniel@iogearbox.net>
The signed gen_loader no longer checks its metadata map from within
BPF; the kernel does it at BPF_PROG_LOAD by folding the loader's frozen
exclusive fd_array maps into the signature. Exercise that path end to
end. Extend with more test cases (e.g. map-less program, asserting the
LSM admission hook observes BPF_SIG_UNSIGNED and BPF_SIG_VERIFIED), and
retire the subtests that asserted the old in-loader check, which no
longer exists.
# LDLIBS=-static PKG_CONFIG='pkg-config --static' ./vmtest.sh -- ./test_progs -t signed_loader
[...]
[ 1.842848] clocksource: Switched to clocksource tsc
#409/1 signed_loader/loadtime_no_map:OK
#409/2 signed_loader/loadtime_with_map:OK
#409/3 signed_loader/metadata_match:OK
#409/4 signed_loader/signature_enforced:OK
#409/5 signed_loader/signed_nonexcl_fd_array_rejected:OK
#409/6 signed_loader/signature_too_large:OK
#409/7 signed_loader/signature_bad_keyring:OK
#409/8 signed_loader/metadata_ctx_max_entries_ignored:OK
#409/9 signed_loader/metadata_ctx_initial_value_ignored:OK
#409/10 signed_loader/signature_authenticates_insns:OK
#409/11 signed_loader/hash_requires_frozen:OK
#409/12 signed_loader/no_update_after_freeze:OK
#409/13 signed_loader/freeze_writable_mmap:OK
#409/14 signed_loader/no_writable_mmap_frozen:OK
#409/15 signed_loader/map_hash_matches_libbpf:OK
#409/16 signed_loader/map_hash_multi_element:OK
#409/17 signed_loader/map_hash_bad_size:OK
#409/18 signed_loader/map_hash_unsupported_type:OK
#409/19 signed_loader/lsm_signature_verdict:OK
#409 signed_loader:OK
Summary: 1/19 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
.../selftests/bpf/prog_tests/signed_loader.c | 460 +++++++++++-------
1 file changed, 274 insertions(+), 186 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/signed_loader.c b/tools/testing/selftests/bpf/prog_tests/signed_loader.c
index 5fc417e31fc6..8a6a6ea4e093 100644
--- a/tools/testing/selftests/bpf/prog_tests/signed_loader.c
+++ b/tools/testing/selftests/bpf/prog_tests/signed_loader.c
@@ -19,8 +19,6 @@
#include "test_signed_loader_data.skel.h"
#include "test_signed_loader_lsm.skel.h"
-#define SIG_MATCH_INSNS 33 /* excl (5) + 4 * sha-dword (7) */
-
enum {
BPF_SIG_UNSIGNED = 0,
BPF_SIG_VERIFIED,
@@ -35,7 +33,8 @@ enum {
};
static int load_loader(const void *insns, __u32 insns_sz, int map_fd,
- const void *sig, __u32 sig_sz, __s32 keyring_id)
+ const void *sig, __u32 sig_sz, __s32 keyring_id,
+ __u32 fd_array_cnt)
{
union bpf_attr attr;
int fd;
@@ -52,6 +51,7 @@ static int load_loader(const void *insns, __u32 insns_sz, int map_fd,
attr.signature_size = sig_sz;
attr.keyring_id = keyring_id;
}
+ attr.fd_array_cnt = fd_array_cnt;
memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr,
offsetofend(union bpf_attr, keyring_id));
@@ -62,14 +62,12 @@ static int run_gen_loader(const void *insns, __u32 insns_sz,
const void *data, __u32 data_sz,
const void *excl, __u32 excl_sz,
const void *sig, __u32 sig_sz,
- bool get_hash, void *ctx, __u32 ctx_sz, bool *loader_ran)
+ void *ctx, __u32 ctx_sz, bool *loader_ran)
{
LIBBPF_OPTS(bpf_map_create_opts, mopts,
.excl_prog_hash = excl,
.excl_prog_hash_size = excl_sz);
- __u8 hbuf[SHA256_DIGEST_LENGTH];
- struct bpf_map_info info;
- __u32 ilen = sizeof(info), key = 0;
+ __u32 key = 0;
union bpf_attr attr;
int map_fd, prog_fd, ret;
@@ -87,15 +85,6 @@ static int run_gen_loader(const void *insns, __u32 insns_sz,
ret = -errno;
goto out_map;
}
- if (get_hash) {
- memset(&info, 0, sizeof(info));
- info.hash = ptr_to_u64(hbuf);
- info.hash_size = sizeof(hbuf);
- if (bpf_map_get_info_by_fd(map_fd, &info, &ilen)) {
- ret = -errno;
- goto out_map;
- }
- }
memset(&attr, 0, sizeof(attr));
attr.prog_type = BPF_PROG_TYPE_SYSCALL;
@@ -108,6 +97,7 @@ static int run_gen_loader(const void *insns, __u32 insns_sz,
attr.signature = ptr_to_u64(sig);
attr.signature_size = sig_sz;
attr.keyring_id = KEY_SPEC_SESSION_KEYRING;
+ attr.fd_array_cnt = 1;
}
memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr,
@@ -236,79 +226,6 @@ static int sign_buf(const char *dir, const void *buf, __u32 len,
return ret;
}
-static void check_sig_match_shape(const struct bpf_insn *in, int n)
-{
- int a = -1, cleanup = -1, i, base, t, br[5], nb = 0;
-
- /* BPF_PSEUDO_MAP_IDX (the struct bpf_map * form) is used only here. */
- for (i = 0; i + 1 < n; i++) {
- if (in[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
- in[i].src_reg == BPF_PSEUDO_MAP_IDX) {
- a = i;
- break;
- }
- }
- if (!ASSERT_GE(a, 0, "emit_signature_match present"))
- return;
- if (!ASSERT_LE(a + SIG_MATCH_INSNS, n, "block fits in program"))
- return;
-
- /* excl check: r2 = *(u32 *)(map + 32); if r2 != 1 goto cleanup */
- ASSERT_EQ(in[a + 2].code, (BPF_LDX | BPF_MEM | BPF_W), "excl load width");
- ASSERT_EQ(in[a + 2].off, SHA256_DIGEST_LENGTH, "excl field offset");
- ASSERT_EQ(in[a + 4].code, (BPF_JMP | BPF_JNE | BPF_K), "excl branch op");
- ASSERT_EQ(in[a + 4].imm, 1, "excl compared to 1");
- br[nb++] = a + 4;
-
- /* 4 sha-dword checks: r2 = *(u64 *)(map + i*8); if r2 != r3 goto cleanup */
- for (i = 0; i < 4; i++) {
- base = a + 5 + i * 7;
- ASSERT_EQ(in[base + 2].code, (BPF_LDX | BPF_MEM | BPF_DW), "sha load width");
- ASSERT_EQ(in[base + 2].off, i * 8, "sha dword offset");
- ASSERT_EQ(in[base + 3].code, (BPF_LD | BPF_IMM | BPF_DW), "sha imm64 (H_meta)");
- ASSERT_EQ(in[base + 6].code, (BPF_JMP | BPF_JNE | BPF_X), "sha branch op");
- br[nb++] = base + 6;
- }
-
- /*
- * Locate the real cleanup label so we can pin the exact jump target,
- * not just "some backward label". bpf_gen__init() emits the cleanup
- * block as a prog-fd close loop whose first instruction is the label
- * every error branch jumps to.
- */
- for (i = 0; i + 2 < a; i++) {
- if (in[i].code == (BPF_LDX | BPF_MEM | BPF_W) &&
- in[i].dst_reg == BPF_REG_1 && in[i].src_reg == BPF_REG_10 &&
- in[i + 1].code == (BPF_JMP | BPF_JSLE | BPF_K) &&
- in[i + 1].dst_reg == BPF_REG_1 && in[i + 1].imm == 0 &&
- in[i + 1].off == 1 &&
- in[i + 2].code == (BPF_JMP | BPF_CALL) &&
- in[i + 2].imm == BPF_FUNC_sys_close) {
- cleanup = i;
- break;
- }
- }
- if (!ASSERT_GE(cleanup, 0, "cleanup label located"))
- return;
- for (i = 0; i < nb; i++) {
- t = br[i] + 1 + in[br[i]].off;
- ASSERT_EQ(t, cleanup, "sig-match lands on cleanup");
- }
- /*
- * Same invariant for every other cleanup-bound jump in the program:
- * emit_check_err() is the only source of "if (r7 < 0) goto cleanup",
- * so each of those must also resolve exactly to cleanup.
- */
- for (i = 0, t = 0; i < n; i++) {
- if (in[i].code != (BPF_JMP | BPF_JSLT | BPF_K) ||
- in[i].dst_reg != BPF_REG_7 || in[i].imm != 0 || in[i].off >= 0)
- continue;
- ASSERT_EQ(i + 1 + in[i].off, cleanup, "err-check lands on cleanup");
- t++;
- }
- ASSERT_GT(t, 0, "found emit_check_err jumps");
-}
-
struct gen_loader_fixture {
struct test_signed_loader *skel;
struct gen_loader_opts gopts;
@@ -372,16 +289,6 @@ static void gen_loader_fixture_fini(struct gen_loader_fixture *f)
test_signed_loader__destroy(f->skel);
}
-static void metadata_check_shape(void)
-{
- struct gen_loader_fixture f;
-
- if (gen_loader_fixture_init(&f) == 0)
- check_sig_match_shape((const struct bpf_insn *)f.gopts.insns,
- f.gopts.insns_sz / sizeof(struct bpf_insn));
- gen_loader_fixture_fini(&f);
-}
-
static void metadata_match(void)
{
struct gen_loader_fixture f;
@@ -391,94 +298,58 @@ static void metadata_match(void)
if (gen_loader_fixture_init(&f) == 0) {
r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob,
f.data_sz, f.excl, sizeof(f.excl), NULL, 0,
- true, f.ctx, f.ctx_sz, &ran);
+ f.ctx, f.ctx_sz, &ran);
ASSERT_TRUE(ran, "loader ran");
ASSERT_EQ(r, 0, "honest loader retval");
}
gen_loader_fixture_fini(&f);
}
-static void metadata_sha_mismatch(void)
-{
- struct gen_loader_fixture f;
- bool ran;
- int r;
-
- if (gen_loader_fixture_init(&f) == 0) {
- /*
- * blob[0] lives in the loader's fd_array scratch (first add_data in
- * bpf_gen__init); a 0-map program never reads it, so flipping it
- * changes only map->sha. The metadata check is the only thing that
- * can notice -> isolates emit_signature_match.
- */
- f.blob[0] ^= 0xff;
- r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob,
- f.data_sz, f.excl, sizeof(f.excl), NULL, 0,
- true, f.ctx, f.ctx_sz, &ran);
- ASSERT_TRUE(ran, "loader ran");
- ASSERT_EQ(r, -EINVAL, "tampered blob rejected by emit_signature_match");
- }
- gen_loader_fixture_fini(&f);
-}
-
-static void metadata_not_exclusive(void)
-{
- struct gen_loader_fixture f;
- bool ran;
- int r;
-
- if (gen_loader_fixture_init(&f) == 0) {
- /*
- * Correct blob but a non-exclusive metadata map: the verifier does
- * not reject (excl_prog_sha unset), so the runtime map->excl == 1
- * check in the loader must.
- */
- r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob,
- f.data_sz, NULL, 0, NULL, 0, true, f.ctx,
- f.ctx_sz, &ran);
- ASSERT_TRUE(ran, "loader ran");
- ASSERT_EQ(r, -EINVAL, "non-exclusive metadata map rejected");
- }
- gen_loader_fixture_fini(&f);
-}
-
-static void metadata_hash_not_computed(void)
+static void signature_enforced(void)
{
+ static const __u8 junk[64] = { 0x30, 0x42, 0x13, 0x37, };
struct gen_loader_fixture f;
- bool ran;
- int r;
+ int fd;
if (gen_loader_fixture_init(&f) == 0) {
/*
- * Correct, exclusive, frozen map, but its hash was never computed
- * (no OBJ_GET_INFO_BY_FD), so map->sha stays zero. The loader must
- * fail closed rather than treat an unset hash as a match.
+ * A present-but-invalid signature (the cert bytes are not a
+ * PKCS#7 signature) must be rejected at load: the signature
+ * path is honored, not ignored. (The valid path is covered by
+ * the signed lskels.)
*/
- r = run_gen_loader(f.gopts.insns, f.gopts.insns_sz, f.blob,
- f.data_sz, f.excl, sizeof(f.excl), NULL, 0,
- false, f.ctx, f.ctx_sz, &ran);
- ASSERT_TRUE(ran, "loader ran");
- ASSERT_EQ(r, -EINVAL, "uncomputed metadata hash rejected");
+ fd = load_loader(f.gopts.insns, f.gopts.insns_sz, -1, junk,
+ sizeof(junk), KEY_SPEC_SESSION_KEYRING, 0);
+ ASSERT_LT(fd, 0, "invalid signature rejected at load");
}
gen_loader_fixture_fini(&f);
}
-static void signature_enforced(void)
+static void signed_nonexcl_fd_array_rejected(void)
{
static const __u8 junk[64] = { 0x30, 0x42, 0x13, 0x37, };
struct gen_loader_fixture f;
- int fd;
+ int map_fd, fd;
if (gen_loader_fixture_init(&f) == 0) {
/*
- * A present-but-invalid signature (the cert bytes are not a
- * PKCS#7 signature) must be rejected at load: the signature
- * path is honored, not ignored. (The valid path is covered by
- * the signed lskels.)
+ * A signed program may only bind exclusive maps through fd_array
+ * (their contents are folded into the signature). Binding a
+ * non-exclusive map is rejected, before the signature is even
+ * examined.
*/
- fd = load_loader(f.gopts.insns, f.gopts.insns_sz, -1, junk,
- sizeof(junk), KEY_SPEC_SESSION_KEYRING);
- ASSERT_LT(fd, 0, "invalid signature rejected at load");
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "nonexcl", 4,
+ f.data_sz, 1, NULL);
+ if (ASSERT_OK_FD(map_fd, "nonexcl_map")) {
+ fd = load_loader(f.gopts.insns, f.gopts.insns_sz, map_fd,
+ junk, sizeof(junk),
+ KEY_SPEC_SESSION_KEYRING, 1);
+ ASSERT_EQ(fd, -EINVAL,
+ "non-exclusive map in signed fd_array rejected");
+ if (fd >= 0)
+ close(fd);
+ close(map_fd);
+ }
}
gen_loader_fixture_fini(&f);
}
@@ -495,7 +366,7 @@ static void signature_too_large(void)
* is rejected before the buffer is read.
*/
fd = load_loader(f.gopts.insns, f.gopts.insns_sz, -1, junk,
- 64 << 20, KEY_SPEC_SESSION_KEYRING);
+ 64 << 20, KEY_SPEC_SESSION_KEYRING, 0);
ASSERT_EQ(fd, -EINVAL, "oversized signature rejected");
}
gen_loader_fixture_fini(&f);
@@ -515,7 +386,7 @@ static void signature_bad_keyring(void)
* large positive serial takes the user-keyring path and won't exist.
*/
fd = load_loader(f.gopts.insns, f.gopts.insns_sz, -1, junk,
- sizeof(junk), INT_MAX);
+ sizeof(junk), INT_MAX, 0);
ASSERT_EQ(fd, -EINVAL, "signature with bad keyring_id rejected");
}
gen_loader_fixture_fini(&f);
@@ -575,7 +446,7 @@ static void metadata_ctx_max_entries_ignored(void)
memcpy(blob, gopts.data, data_sz);
r = run_gen_loader(gopts.insns, gopts.insns_sz, blob, data_sz,
- excl, sizeof(excl), NULL, 0, true, ctx, ctx_sz, &ran);
+ excl, sizeof(excl), NULL, 0, ctx, ctx_sz, &ran);
if (!ASSERT_TRUE(ran, "loader ran") ||
!ASSERT_EQ(r, 0, "loader retval"))
goto free_blob;
@@ -661,7 +532,7 @@ static void metadata_ctx_initial_value_ignored(void)
memcpy(blob, gopts.data, data_sz);
r = run_gen_loader(gopts.insns, gopts.insns_sz, blob, data_sz,
- excl, sizeof(excl), NULL, 0, true, ctx, ctx_sz, &ran);
+ excl, sizeof(excl), NULL, 0, ctx, ctx_sz, &ran);
if (!ASSERT_TRUE(ran, "loader ran") ||
!ASSERT_EQ(r, 0, "loader retval"))
goto free_blob;
@@ -714,6 +585,7 @@ static void signature_authenticates_insns(void)
__u8 excl[SHA256_DIGEST_LENGTH], sig[8192];
__u32 sig_sz = sizeof(sig), insns_sz, data_sz, ctx_sz;
unsigned char *insns = NULL, *tampered = NULL, *blob = NULL;
+ unsigned char *signbuf = NULL;
int nr_maps = 0, nr_progs = 0, r;
struct bpf_program *p;
struct bpf_map *m;
@@ -760,13 +632,19 @@ static void signature_authenticates_insns(void)
memcpy(blob, gopts.data, data_sz);
libbpf_sha256(insns, insns_sz, excl);
- if (!ASSERT_OK(sign_buf(dir, insns, insns_sz, sig, &sig_sz), "sign-file"))
+ signbuf = malloc((size_t)insns_sz + data_sz);
+ if (!ASSERT_OK_PTR(signbuf, "signbuf"))
+ goto cleanup;
+ memcpy(signbuf, insns, insns_sz);
+ memcpy(signbuf + insns_sz, blob, data_sz);
+ if (!ASSERT_OK(sign_buf(dir, signbuf, insns_sz + data_sz, sig, &sig_sz),
+ "sign-file"))
goto cleanup;
memset(ctx, 0, ctx_sz);
((struct bpf_loader_ctx *)ctx)->sz = ctx_sz;
r = run_gen_loader(insns, insns_sz, blob, data_sz, excl, sizeof(excl),
- sig, sig_sz, true, ctx, ctx_sz, &ran);
+ sig, sig_sz, ctx, ctx_sz, &ran);
ASSERT_TRUE(ran, "valid signature: loader loaded and ran");
ASSERT_EQ(r, 0, "valid signature accepted");
close_loader_ctx_fds(ctx, nr_maps, nr_progs);
@@ -776,13 +654,14 @@ static void signature_authenticates_insns(void)
memset(ctx, 0, ctx_sz);
((struct bpf_loader_ctx *)ctx)->sz = ctx_sz;
r = run_gen_loader(tampered, insns_sz, blob, data_sz, excl, sizeof(excl),
- sig, sig_sz, true, ctx, ctx_sz, &ran);
+ sig, sig_sz, ctx, ctx_sz, &ran);
ASSERT_FALSE(ran, "tampered loader rejected before run");
ASSERT_EQ(r, -EKEYREJECTED, "signature is bound to the instructions");
cleanup:
free(insns);
free(tampered);
free(blob);
+ free(signbuf);
free(ctx);
test_signed_loader__destroy(skel);
run_setup("cleanup", dir);
@@ -1007,10 +886,11 @@ static void lsm_signature_verdict(void)
{
char dir_tmpl[] = "/tmp/signed_loader_lsmXXXXXX", *dir = NULL;
struct test_signed_loader_lsm *lsm = NULL;
+ __u32 sig_sz = 8192, msig_sz = 8192;
int map_fd = -1, prog_fd = -1;
bool have_fixture = false;
struct gen_loader_fixture f;
- __u32 sig_sz = 8192;
+ unsigned char *buf;
__s32 ses_serial;
__u8 sig[8192];
@@ -1029,7 +909,7 @@ static void lsm_signature_verdict(void)
if (!ASSERT_OK_FD(map_fd, "meta_map_unsigned"))
goto out;
lsm->bss->seen = 0;
- prog_fd = load_loader(f.gopts.insns, f.gopts.insns_sz, map_fd, NULL, 0, 0);
+ prog_fd = load_loader(f.gopts.insns, f.gopts.insns_sz, map_fd, NULL, 0, 0, 0);
close(map_fd);
map_fd = -1;
if (!ASSERT_OK_FD(prog_fd, "unsigned loader load"))
@@ -1062,22 +942,51 @@ static void lsm_signature_verdict(void)
goto out;
lsm->bss->seen = 0;
prog_fd = load_loader(f.gopts.insns, f.gopts.insns_sz, map_fd, sig,
- sig_sz, KEY_SPEC_SESSION_KEYRING);
+ sig_sz, KEY_SPEC_SESSION_KEYRING, 0);
close(map_fd);
map_fd = -1;
- if (!ASSERT_OK_FD(prog_fd, "signed loader load"))
- goto out;
- close(prog_fd);
+ ASSERT_EQ(prog_fd, -EACCES, "unfolded metadata rejected");
+ if (prog_fd >= 0)
+ close(prog_fd);
prog_fd = -1;
ses_serial = syscall(__NR_keyctl, KEYCTL_GET_KEYRING_ID,
KEY_SPEC_SESSION_KEYRING, 0);
ASSERT_EQ(lsm->bss->seen, 1, "signed: one observed load");
- ASSERT_EQ(lsm->bss->sig_verdict, BPF_SIG_VERIFIED, "signed verdict");
+ ASSERT_EQ(lsm->bss->sig_verdict, BPF_SIG_VERIFIED,
+ "admission saw a valid signature");
ASSERT_EQ(lsm->bss->sig_keyring_type, BPF_SIG_KEYRING_USER, "signed keyring type");
ASSERT_GT(ses_serial, 0, "session keyring serial resolved");
ASSERT_EQ(lsm->bss->sig_keyring_serial, ses_serial,
"signed: validated against session keyring");
+
+ buf = malloc((size_t)f.gopts.insns_sz + f.data_sz);
+ if (!ASSERT_OK_PTR(buf, "meta_signbuf"))
+ goto out;
+ memcpy(buf, f.gopts.insns, f.gopts.insns_sz);
+ memcpy(buf + f.gopts.insns_sz, f.blob, f.data_sz);
+ if (!ASSERT_OK(sign_buf(dir, buf, f.gopts.insns_sz + f.data_sz,
+ sig, &msig_sz), "sign insns||metadata")) {
+ free(buf);
+ goto out;
+ }
+ free(buf);
+
+ map_fd = setup_meta_map(&f);
+ if (!ASSERT_OK_FD(map_fd, "meta_map_bound"))
+ goto out;
+ lsm->bss->seen = 0;
+ prog_fd = load_loader(f.gopts.insns, f.gopts.insns_sz, map_fd, sig,
+ msig_sz, KEY_SPEC_SESSION_KEYRING, 1);
+ close(map_fd);
+ map_fd = -1;
+ if (!ASSERT_OK_FD(prog_fd, "metadata-bound loader load"))
+ goto out;
+ close(prog_fd);
+ prog_fd = -1;
+ ASSERT_EQ(lsm->bss->seen, 1, "metadata: one observed load");
+ ASSERT_EQ(lsm->bss->sig_verdict, BPF_SIG_VERIFIED,
+ "metadata-bound verdict");
out:
if (map_fd >= 0)
close(map_fd);
@@ -1090,20 +999,199 @@ static void lsm_signature_verdict(void)
test_signed_loader_lsm__destroy(lsm);
}
+/*
+ * Load-time metadata verification: the kernel folds the frozen metadata map
+ * into the signature (insns || metadata) and checks it at BPF_PROG_LOAD via
+ * fd_array_cnt, rather than the loader checking from within BPF. Sign that
+ * concatenation, hand the kernel the map, and confirm the signed loader loads,
+ * runs, and installs its target.
+ */
+static int loadtime_drive(const char *dir, const void *insns, __u32 insns_sz,
+ const void *data, __u32 data_sz, const __u8 *excl,
+ void *ctx, __u32 ctx_sz, int *load_ret, bool *ran)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, mopts,
+ .excl_prog_hash = excl,
+ .excl_prog_hash_size = SHA256_DIGEST_LENGTH);
+ __u32 sig_sz = 8192, key = 0;
+ unsigned char *buf = NULL;
+ int map_fd, prog_fd, ret = 0;
+ union bpf_attr attr;
+ __u8 sig[8192];
+
+ *ran = false;
+ *load_ret = 0;
+
+ /*
+ * Metadata map, bound to the loader digest and frozen, exactly as
+ * skel_internal.h's bpf_load_and_run() sets it up.
+ */
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4,
+ data_sz, 1, &mopts);
+ if (map_fd < 0)
+ return -errno;
+ if (bpf_map_update_elem(map_fd, &key, data, 0) || bpf_map_freeze(map_fd)) {
+ ret = -errno;
+ goto out_map;
+ }
+
+ /* Sign insns || metadata, the same bytes the kernel reconstructs. */
+ buf = malloc((size_t)insns_sz + data_sz);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out_map;
+ }
+ memcpy(buf, insns, insns_sz);
+ memcpy(buf + insns_sz, data, data_sz);
+ ret = sign_buf(dir, buf, insns_sz + data_sz, sig, &sig_sz);
+ if (ret)
+ goto out_map;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SYSCALL;
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = insns_sz / sizeof(struct bpf_insn);
+ attr.license = ptr_to_u64("Dual BSD/GPL");
+ attr.prog_flags = BPF_F_SLEEPABLE;
+ attr.fd_array = ptr_to_u64(&map_fd);
+ attr.signature = ptr_to_u64(sig);
+ attr.signature_size = sig_sz;
+ attr.keyring_id = KEY_SPEC_SESSION_KEYRING;
+ attr.fd_array_cnt = 1;
+ memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
+ prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr,
+ offsetofend(union bpf_attr, keyring_id));
+ if (prog_fd < 0) {
+ *load_ret = -errno;
+ ret = -errno;
+ goto out_map;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.ctx_in = ptr_to_u64(ctx);
+ attr.test.ctx_size_in = ctx_sz;
+ if (syscall(__NR_bpf, BPF_PROG_RUN, &attr,
+ offsetofend(union bpf_attr, test)) < 0) {
+ ret = -errno;
+ goto out_prog;
+ }
+ *ran = true;
+ ret = (int)attr.test.retval;
+out_prog:
+ close(prog_fd);
+out_map:
+ free(buf);
+ close(map_fd);
+ return ret;
+}
+
+static void loadtime_verify(struct bpf_object *obj, int expect_maps)
+{
+ LIBBPF_OPTS(gen_loader_opts, gopts, .gen_hash = true);
+ char dir_tmpl[] = "/tmp/signed_loader_ltXXXXXX", *dir = NULL;
+ int nr_maps = 0, nr_progs = 0, load_ret = 0, r;
+ __u8 excl[SHA256_DIGEST_LENGTH];
+ struct bpf_prog_desc *pd;
+ struct bpf_map_desc *md;
+ unsigned char *blob = NULL;
+ struct bpf_program *p;
+ struct bpf_map *m;
+ __u32 ctx_sz, data_sz;
+ void *ctx = NULL;
+ bool ran = false;
+
+ syscall(__NR_request_key, "keyring", "_uid.0", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+ dir = mkdtemp(dir_tmpl);
+ if (!ASSERT_OK_PTR(dir, "mkdtemp"))
+ return;
+ if (!ASSERT_OK(run_setup("setup", dir), "verify_sig_setup")) {
+ rmdir(dir);
+ return;
+ }
+
+ if (!ASSERT_OK(bpf_object__gen_loader(obj, &gopts), "gen_loader"))
+ goto out;
+ if (!ASSERT_OK(bpf_object__load(obj), "gen_load"))
+ goto out;
+
+ bpf_object__for_each_program(p, obj)
+ nr_progs++;
+ bpf_object__for_each_map(m, obj)
+ nr_maps++;
+ if (!ASSERT_EQ(nr_maps, expect_maps, "fixture map count"))
+ goto out;
+
+ ctx_sz = sizeof(struct bpf_loader_ctx) +
+ nr_maps * sizeof(struct bpf_map_desc) +
+ nr_progs * sizeof(struct bpf_prog_desc);
+ ctx = calloc(1, ctx_sz);
+ if (!ASSERT_OK_PTR(ctx, "ctx_alloc"))
+ goto out;
+ ((struct bpf_loader_ctx *)ctx)->sz = ctx_sz;
+
+ data_sz = gopts.data_sz;
+ blob = malloc(data_sz);
+ if (!ASSERT_OK_PTR(blob, "blob_alloc"))
+ goto out;
+ memcpy(blob, gopts.data, data_sz);
+
+ /* excl_prog_hash = SHA256(loader insns) == the loader's prog->digest. */
+ libbpf_sha256(gopts.insns, gopts.insns_sz, excl);
+
+ r = loadtime_drive(dir, gopts.insns, gopts.insns_sz, blob, data_sz,
+ excl, ctx, ctx_sz, &load_ret, &ran);
+ ASSERT_OK(load_ret, "signed loader loaded (insns || metadata)");
+ ASSERT_TRUE(ran, "loader ran");
+ ASSERT_EQ(r, 0, "loader installed its target");
+
+ md = (struct bpf_map_desc *)((char *)ctx + sizeof(struct bpf_loader_ctx));
+ pd = (struct bpf_prog_desc *)(md + nr_maps);
+ ASSERT_GT(pd[0].prog_fd, 0, "target program installed");
+ if (nr_maps)
+ ASSERT_GT(md[0].map_fd, 0, "target map installed");
+
+ close_loader_ctx_fds(ctx, nr_maps, nr_progs);
+out:
+ free(blob);
+ free(ctx);
+ if (dir)
+ run_setup("cleanup", dir);
+}
+
+static void loadtime_no_map(void)
+{
+ struct test_signed_loader *skel = test_signed_loader__open();
+
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ loadtime_verify(skel->obj, 0);
+ test_signed_loader__destroy(skel);
+}
+
+static void loadtime_with_map(void)
+{
+ struct test_signed_loader_map *skel = test_signed_loader_map__open();
+
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ loadtime_verify(skel->obj, 1);
+ test_signed_loader_map__destroy(skel);
+}
+
void test_signed_loader(void)
{
- if (test__start_subtest("metadata_check_shape"))
- metadata_check_shape();
+ if (test__start_subtest("loadtime_no_map"))
+ loadtime_no_map();
+ if (test__start_subtest("loadtime_with_map"))
+ loadtime_with_map();
if (test__start_subtest("metadata_match"))
metadata_match();
- if (test__start_subtest("metadata_sha_mismatch"))
- metadata_sha_mismatch();
- if (test__start_subtest("metadata_not_exclusive"))
- metadata_not_exclusive();
- if (test__start_subtest("metadata_hash_not_computed"))
- metadata_hash_not_computed();
if (test__start_subtest("signature_enforced"))
signature_enforced();
+ if (test__start_subtest("signed_nonexcl_fd_array_rejected"))
+ signed_nonexcl_fd_array_rejected();
if (test__start_subtest("signature_too_large"))
signature_too_large();
if (test__start_subtest("signature_bad_keyring"))
--
2.43.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox