* [PATCH v2 03/17] landlock: Split struct landlock_domain from struct landlock_ruleset
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
To: Christian Brauner, Günther Noack, Steven Rostedt
Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>
Switch all domain users to the new struct landlock_domain type
introduced by a previous commit. This eliminates the conflation between
mutable rulesets and immutable domains.
Change the credential domain field to struct landlock_domain *, and
update all consumer functions. Move the merge and inherit chain from
ruleset.c to domain.c; landlock_merge_ruleset() now returns struct
landlock_domain * and uses create_domain(). Lock assertions on the
destination are removed because domains have no lock.
Rename the per-layer FAM from access_masks to layers, and the single
ruleset field from access_masks to layer, to prepare for future
per-layer extensions beyond handled-access bitfields.
Clean up struct landlock_ruleset by removing domain-only fields
(hierarchy, work_free, num_layers) and replacing the layers[] FAM with a
single struct access_masks layer field.
Break the circular include between audit.h and cred.h by replacing the
cred.h include in audit.h with forward declarations.
Cc: Günther Noack <gnoack@google.com>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
Changes since v1:
- New patch.
---
security/landlock/access.h | 4 +-
security/landlock/audit.c | 12 +-
security/landlock/audit.h | 4 +-
security/landlock/cred.c | 6 +-
security/landlock/cred.h | 21 ++-
security/landlock/domain.c | 252 ++++++++++++++++++++++++++-
security/landlock/domain.h | 43 ++++-
security/landlock/fs.c | 28 ++-
security/landlock/net.c | 3 +-
security/landlock/ruleset.c | 329 ++++-------------------------------
security/landlock/ruleset.h | 129 ++------------
security/landlock/syscalls.c | 10 +-
security/landlock/task.c | 20 +--
13 files changed, 386 insertions(+), 475 deletions(-)
diff --git a/security/landlock/access.h b/security/landlock/access.h
index c19d5bc13944..76ab447dfcf7 100644
--- a/security/landlock/access.h
+++ b/security/landlock/access.h
@@ -19,8 +19,8 @@
/*
* All access rights that are denied by default whether they are handled or not
- * by a ruleset/layer. This must be ORed with all ruleset->access_masks[]
- * entries when we need to get the absolute handled access masks, see
+ * by a ruleset/layer. This must be ORed with all domain->layers[] entries when
+ * we need to get the absolute handled access masks, see
* landlock_upgrade_handled_access_masks().
*/
/* clang-format off */
diff --git a/security/landlock/audit.c b/security/landlock/audit.c
index 8d0edf94037d..75438b3cc887 100644
--- a/security/landlock/audit.c
+++ b/security/landlock/audit.c
@@ -135,7 +135,7 @@ static void log_domain(struct landlock_hierarchy *const hierarchy)
}
static struct landlock_hierarchy *
-get_hierarchy(const struct landlock_ruleset *const domain, const size_t layer)
+get_hierarchy(const struct landlock_domain *const domain, const size_t layer)
{
struct landlock_hierarchy *hierarchy = domain->hierarchy;
ssize_t i;
@@ -168,7 +168,7 @@ static void test_get_hierarchy(struct kunit *const test)
.parent = &dom1_hierarchy,
.id = 30,
};
- struct landlock_ruleset dom2 = {
+ struct landlock_domain dom2 = {
.hierarchy = &dom2_hierarchy,
.num_layers = 3,
};
@@ -182,7 +182,7 @@ static void test_get_hierarchy(struct kunit *const test)
#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
/* Get the youngest layer that denied the access_request. */
-static size_t get_denied_layer(const struct landlock_ruleset *const domain,
+static size_t get_denied_layer(const struct landlock_domain *const domain,
access_mask_t *const access_request,
const struct layer_access_masks *masks)
{
@@ -202,7 +202,7 @@ static size_t get_denied_layer(const struct landlock_ruleset *const domain,
static void test_get_denied_layer(struct kunit *const test)
{
- const struct landlock_ruleset dom = {
+ const struct landlock_domain dom = {
.num_layers = 5,
};
const struct layer_access_masks masks = {
@@ -440,8 +440,8 @@ void landlock_log_denial(const struct landlock_cred_security *const subject,
* Only domains which previously appeared in the audit logs are logged again.
* This is useful to know when a domain will never show again in the audit log.
*
- * Called in a work queue scheduled by landlock_put_ruleset_deferred() called
- * by hook_cred_free().
+ * Called in a work queue scheduled by landlock_put_domain_deferred() called by
+ * hook_cred_free().
*/
void landlock_log_drop_domain(const struct landlock_hierarchy *const hierarchy)
{
diff --git a/security/landlock/audit.h b/security/landlock/audit.h
index 56778331b58c..50452a791656 100644
--- a/security/landlock/audit.h
+++ b/security/landlock/audit.h
@@ -12,7 +12,9 @@
#include <linux/lsm_audit.h>
#include "access.h"
-#include "cred.h"
+
+struct landlock_cred_security;
+struct landlock_hierarchy;
enum landlock_request_type {
LANDLOCK_REQUEST_PTRACE = 1,
diff --git a/security/landlock/cred.c b/security/landlock/cred.c
index cc419de75cd6..58b544993db4 100644
--- a/security/landlock/cred.c
+++ b/security/landlock/cred.c
@@ -22,7 +22,7 @@ static void hook_cred_transfer(struct cred *const new,
const struct landlock_cred_security *const old_llcred =
landlock_cred(old);
- landlock_get_ruleset(old_llcred->domain);
+ landlock_get_domain(old_llcred->domain);
*landlock_cred(new) = *old_llcred;
}
@@ -35,10 +35,10 @@ static int hook_cred_prepare(struct cred *const new,
static void hook_cred_free(struct cred *const cred)
{
- struct landlock_ruleset *const dom = landlock_cred(cred)->domain;
+ struct landlock_domain *const dom = landlock_cred(cred)->domain;
if (dom)
- landlock_put_ruleset_deferred(dom);
+ landlock_put_domain_deferred(dom);
}
#ifdef CONFIG_AUDIT
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
index f287c56b5fd4..c42b0d3ecec8 100644
--- a/security/landlock/cred.h
+++ b/security/landlock/cred.h
@@ -16,6 +16,7 @@
#include <linux/rcupdate.h>
#include "access.h"
+#include "domain.h"
#include "limits.h"
#include "ruleset.h"
#include "setup.h"
@@ -31,9 +32,9 @@
*/
struct landlock_cred_security {
/**
- * @domain: Immutable ruleset enforced on a task.
+ * @domain: Immutable domain enforced on a task.
*/
- struct landlock_ruleset *domain;
+ struct landlock_domain *domain;
#ifdef CONFIG_AUDIT
/**
@@ -70,22 +71,20 @@ landlock_cred(const struct cred *cred)
static inline void landlock_cred_copy(struct landlock_cred_security *dst,
const struct landlock_cred_security *src)
{
- landlock_put_ruleset(dst->domain);
+ landlock_put_domain(dst->domain);
*dst = *src;
- landlock_get_ruleset(src->domain);
+ landlock_get_domain(src->domain);
}
-static inline struct landlock_ruleset *landlock_get_current_domain(void)
+static inline struct landlock_domain *landlock_get_current_domain(void)
{
return landlock_cred(current_cred())->domain;
}
-/*
- * The call needs to come from an RCU read-side critical section.
- */
-static inline const struct landlock_ruleset *
+/* The call needs to come from an RCU read-side critical section. */
+static inline const struct landlock_domain *
landlock_get_task_domain(const struct task_struct *const task)
{
return landlock_cred(__task_cred(task))->domain;
@@ -126,7 +125,7 @@ landlock_get_applicable_subject(const struct cred *const cred,
const union access_masks_all masks_all = {
.masks = masks,
};
- const struct landlock_ruleset *domain;
+ const struct landlock_domain *domain;
ssize_t layer_level;
if (!cred)
@@ -139,7 +138,7 @@ landlock_get_applicable_subject(const struct cred *const cred,
for (layer_level = domain->num_layers - 1; layer_level >= 0;
layer_level--) {
union access_masks_all layer = {
- .masks = domain->access_masks[layer_level],
+ .masks = domain->layers[layer_level],
};
if (layer.all & masks_all.all) {
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index cb79edf5df02..317fd94d3ccd 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -36,6 +36,36 @@
#include "object.h"
#include "ruleset.h"
+static void build_check_domain(void)
+{
+ const struct landlock_domain domain = {
+ .num_layers = ~0,
+ };
+
+ BUILD_BUG_ON(domain.num_layers < LANDLOCK_MAX_NUM_LAYERS);
+}
+
+static struct landlock_domain *create_domain(const u32 num_layers)
+{
+ struct landlock_domain *new_domain;
+
+ build_check_domain();
+ new_domain = kzalloc_flex(*new_domain, layers, num_layers,
+ GFP_KERNEL_ACCOUNT);
+ if (!new_domain)
+ return ERR_PTR(-ENOMEM);
+
+ refcount_set(&new_domain->usage, 1);
+ new_domain->rules.root_inode = RB_ROOT;
+
+#if IS_ENABLED(CONFIG_INET)
+ new_domain->rules.root_net_port = RB_ROOT;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+ new_domain->num_layers = num_layers;
+ return new_domain;
+}
+
static void free_domain(struct landlock_domain *const domain)
{
might_sleep();
@@ -67,15 +97,15 @@ void landlock_put_domain_deferred(struct landlock_domain *const domain)
}
}
-/* The returned access has the same lifetime as @ruleset. */
+/* The returned access has the same lifetime as @domain. */
const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
+landlock_find_rule(const struct landlock_domain *const domain,
const struct landlock_id id)
{
const struct rb_root *root;
const struct rb_node *node;
- root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
+ root = landlock_get_rule_root((struct landlock_rules *)&domain->rules,
id.type);
if (IS_ERR(root))
return NULL;
@@ -151,7 +181,7 @@ bool landlock_unmask_layers(const struct landlock_rule *const rule,
}
typedef access_mask_t
-get_access_mask_t(const struct landlock_ruleset *const ruleset,
+get_access_mask_t(const struct landlock_domain *const domain,
const u16 layer_level);
/**
@@ -169,7 +199,7 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset,
* any of the active layers in @domain.
*/
access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+landlock_init_layer_masks(const struct landlock_domain *const domain,
const access_mask_t access_request,
struct layer_access_masks *const masks,
const enum landlock_key_type key_type)
@@ -209,6 +239,218 @@ landlock_init_layer_masks(const struct landlock_ruleset *const domain,
return handled_accesses;
}
+static int merge_tree(struct landlock_domain *const dst,
+ struct landlock_ruleset *const src,
+ const enum landlock_key_type key_type)
+{
+ struct landlock_rule *walker_rule, *next_rule;
+ struct rb_root *src_root;
+ int err = 0;
+
+ might_sleep();
+ lockdep_assert_held(&src->lock);
+
+ src_root = landlock_get_rule_root(&src->rules, key_type);
+ if (IS_ERR(src_root))
+ return PTR_ERR(src_root);
+
+ /* Merges the @src tree. */
+ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, src_root,
+ node) {
+ struct landlock_layer layers[] = { {
+ .level = dst->num_layers,
+ } };
+ const struct landlock_id id = {
+ .key = walker_rule->key,
+ .type = key_type,
+ };
+
+ if (WARN_ON_ONCE(walker_rule->num_layers != 1))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(walker_rule->layers[0].level != 0))
+ return -EINVAL;
+
+ layers[0].access = walker_rule->layers[0].access;
+
+ err = landlock_rule_insert(&dst->rules, id, &layers,
+ ARRAY_SIZE(layers));
+ if (err)
+ return err;
+ }
+ return err;
+}
+
+static int merge_ruleset(struct landlock_domain *const dst,
+ struct landlock_ruleset *const src)
+{
+ int err = 0;
+
+ might_sleep();
+ /* Should already be checked by landlock_merge_ruleset() */
+ if (WARN_ON_ONCE(!src))
+ return 0;
+ /* Only merge into a domain. */
+ if (WARN_ON_ONCE(!dst || !dst->hierarchy))
+ return -EINVAL;
+
+ mutex_lock(&src->lock);
+
+ /* Stacks the new layer. */
+ if (WARN_ON_ONCE(dst->num_layers < 1)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ dst->layers[dst->num_layers - 1] =
+ landlock_upgrade_handled_access_masks(src->layer);
+
+ /* Merges the @src inode tree. */
+ err = merge_tree(dst, src, LANDLOCK_KEY_INODE);
+ if (err)
+ goto out_unlock;
+
+#if IS_ENABLED(CONFIG_INET)
+ /* Merges the @src network port tree. */
+ err = merge_tree(dst, src, LANDLOCK_KEY_NET_PORT);
+ if (err)
+ goto out_unlock;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+out_unlock:
+ mutex_unlock(&src->lock);
+ return err;
+}
+
+static int inherit_tree(struct landlock_domain *const parent,
+ struct landlock_domain *const child,
+ const enum landlock_key_type key_type)
+{
+ struct landlock_rule *walker_rule, *next_rule;
+ struct rb_root *parent_root;
+ int err = 0;
+
+ might_sleep();
+
+ parent_root = landlock_get_rule_root(
+ (struct landlock_rules *)&parent->rules, key_type);
+ if (IS_ERR(parent_root))
+ return PTR_ERR(parent_root);
+
+ /* Copies the @parent inode or network tree. */
+ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
+ parent_root, node) {
+ const struct landlock_id id = {
+ .key = walker_rule->key,
+ .type = key_type,
+ };
+
+ err = landlock_rule_insert(&child->rules, id,
+ &walker_rule->layers,
+ walker_rule->num_layers);
+ if (err)
+ return err;
+ }
+ return err;
+}
+
+static int inherit_ruleset(struct landlock_domain *const parent,
+ struct landlock_domain *const child)
+{
+ int err = 0;
+
+ might_sleep();
+ if (!parent)
+ return 0;
+
+ /* Copies the @parent inode tree. */
+ err = inherit_tree(parent, child, LANDLOCK_KEY_INODE);
+ if (err)
+ return err;
+
+#if IS_ENABLED(CONFIG_INET)
+ /* Copies the @parent network port tree. */
+ err = inherit_tree(parent, child, LANDLOCK_KEY_NET_PORT);
+ if (err)
+ return err;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+ if (WARN_ON_ONCE(child->num_layers <= parent->num_layers))
+ return -EINVAL;
+
+ /* Copies the parent layer stack and leaves a space for the new layer. */
+ memcpy(child->layers, parent->layers,
+ flex_array_size(parent, layers, parent->num_layers));
+
+ if (WARN_ON_ONCE(!parent->hierarchy))
+ return -EINVAL;
+
+ landlock_get_hierarchy(parent->hierarchy);
+ child->hierarchy->parent = parent->hierarchy;
+
+ return 0;
+}
+
+/**
+ * landlock_merge_ruleset - Merge a ruleset with a domain
+ *
+ * @parent: Parent domain.
+ * @ruleset: New ruleset to be merged.
+ *
+ * The current task is requesting to be restricted. The subjective credentials
+ * must not be in an overridden state. cf. landlock_init_hierarchy_log().
+ *
+ * Return: A new domain merging @parent and @ruleset on success, or ERR_PTR() on
+ * failure. If @parent is NULL, the new domain duplicates @ruleset.
+ */
+struct landlock_domain *
+landlock_merge_ruleset(struct landlock_domain *const parent,
+ struct landlock_ruleset *const ruleset)
+{
+ struct landlock_domain *new_dom __free(landlock_put_domain) = NULL;
+ u32 num_layers;
+ int err;
+
+ might_sleep();
+ if (WARN_ON_ONCE(!ruleset))
+ return ERR_PTR(-EINVAL);
+
+ if (parent) {
+ if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
+ return ERR_PTR(-E2BIG);
+ num_layers = parent->num_layers + 1;
+ } else {
+ num_layers = 1;
+ }
+
+ /* Creates a new domain... */
+ new_dom = create_domain(num_layers);
+ if (IS_ERR(new_dom))
+ return new_dom;
+
+ new_dom->hierarchy =
+ kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
+ if (!new_dom->hierarchy)
+ return ERR_PTR(-ENOMEM);
+
+ refcount_set(&new_dom->hierarchy->usage, 1);
+
+ /* ...as a child of @parent... */
+ err = inherit_ruleset(parent, new_dom);
+ if (err)
+ return ERR_PTR(err);
+
+ /* ...and including @ruleset. */
+ err = merge_ruleset(new_dom, ruleset);
+ if (err)
+ return ERR_PTR(err);
+
+ err = landlock_init_hierarchy_log(new_dom->hierarchy);
+ if (err)
+ return ERR_PTR(err);
+
+ return no_free_ptr(new_dom);
+}
+
#ifdef CONFIG_AUDIT
/**
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index afa97011ecd2..df11cb7d4f2b 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -196,7 +196,7 @@ struct landlock_domain {
* @work_free: Enables to free a domain within a lockless
* section. This is only used by landlock_put_domain_deferred()
* when @usage reaches zero. The fields @usage, @num_layers and
- * @access_masks are then unused.
+ * @layers are then unused.
*/
struct work_struct work_free;
struct {
@@ -212,7 +212,7 @@ struct landlock_domain {
*/
u32 num_layers;
/**
- * @access_masks: Contains the subset of filesystem and
+ * @layers: Contains the subset of filesystem and
* network actions that are restricted by a domain. A
* domain saves all layers of merged rulesets in a stack
* (FAM), starting from the first layer to the last one.
@@ -222,28 +222,51 @@ struct landlock_domain {
* overlapping access rights. These layers are set once
* and never changed for the lifetime of the domain.
*/
- struct access_masks access_masks[];
+ struct access_masks layers[];
};
};
};
+static inline access_mask_t
+landlock_get_fs_access_mask(const struct landlock_domain *const domain,
+ const u16 layer_level)
+{
+ /* Handles all initially denied by default access rights. */
+ return domain->layers[layer_level].fs |
+ _LANDLOCK_ACCESS_FS_INITIALLY_DENIED;
+}
+
+static inline access_mask_t
+landlock_get_net_access_mask(const struct landlock_domain *const domain,
+ const u16 layer_level)
+{
+ return domain->layers[layer_level].net;
+}
+
+static inline access_mask_t
+landlock_get_scope_mask(const struct landlock_domain *const domain,
+ const u16 layer_level)
+{
+ return domain->layers[layer_level].scope;
+}
+
/**
* landlock_union_access_masks - Return all access rights handled in the
* domain
*
- * @domain: Landlock ruleset (used as a domain)
+ * @domain: Landlock domain
*
* Return: An access_masks result of the OR of all the domain's access masks.
*/
static inline struct access_masks
-landlock_union_access_masks(const struct landlock_ruleset *const domain)
+landlock_union_access_masks(const struct landlock_domain *const domain)
{
union access_masks_all matches = {};
size_t layer_level;
for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
union access_masks_all layer = {
- .masks = domain->access_masks[layer_level],
+ .masks = domain->layers[layer_level],
};
matches.all |= layer.all;
@@ -258,15 +281,19 @@ void landlock_put_domain_deferred(struct landlock_domain *const domain);
DEFINE_FREE(landlock_put_domain, struct landlock_domain *,
if (!IS_ERR_OR_NULL(_T)) landlock_put_domain(_T))
+struct landlock_domain *
+landlock_merge_ruleset(struct landlock_domain *const parent,
+ struct landlock_ruleset *const ruleset);
+
const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
+landlock_find_rule(const struct landlock_domain *const domain,
const struct landlock_id id);
bool landlock_unmask_layers(const struct landlock_rule *const rule,
struct layer_access_masks *masks);
access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+landlock_init_layer_masks(const struct landlock_domain *const domain,
const access_mask_t access_request,
struct layer_access_masks *masks,
const enum landlock_key_type key_type);
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index c1ecfe239032..3ef453fc14a6 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -336,12 +336,10 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
if (!d_is_dir(path->dentry) &&
!access_mask_subset(access_rights, ACCESS_FILE))
return -EINVAL;
- if (WARN_ON_ONCE(ruleset->num_layers != 1))
- return -EINVAL;
-
/* Transforms relative access rights to absolute ones. */
- access_rights |= LANDLOCK_MASK_ACCESS_FS &
- ~landlock_get_fs_access_mask(ruleset, 0);
+ access_rights |=
+ LANDLOCK_MASK_ACCESS_FS &
+ ~(ruleset->layer.fs | _LANDLOCK_ACCESS_FS_INITIALLY_DENIED);
id.key.object = get_inode_object(d_backing_inode(path->dentry));
if (IS_ERR(id.key.object))
return PTR_ERR(id.key.object);
@@ -364,7 +362,7 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
* Returns NULL if no rule is found or if @dentry is negative.
*/
static const struct landlock_rule *
-find_rule(const struct landlock_ruleset *const domain,
+find_rule(const struct landlock_domain *const domain,
const struct dentry *const dentry)
{
const struct landlock_rule *rule;
@@ -740,7 +738,7 @@ static void test_is_eacces_with_write(struct kunit *const test)
* Return: True if the access request is granted, false otherwise.
*/
static bool
-is_access_to_paths_allowed(const struct landlock_ruleset *const domain,
+is_access_to_paths_allowed(const struct landlock_domain *const domain,
const struct path *const path,
const access_mask_t access_request_parent1,
struct layer_access_masks *layer_masks_parent1,
@@ -1026,7 +1024,7 @@ static access_mask_t maybe_remove(const struct dentry *const dentry)
* Return: True if all the domain access rights are allowed for @dir, false if
* the walk reached @mnt_root.
*/
-static bool collect_domain_accesses(const struct landlock_ruleset *const domain,
+static bool collect_domain_accesses(const struct landlock_domain *const domain,
const struct dentry *const mnt_root,
struct dentry *dir,
struct layer_access_masks *layer_masks_dom)
@@ -1578,8 +1576,8 @@ static int hook_path_truncate(const struct path *const path)
* @masks: Layer access masks to unmask
* @access: Access bits that control scoping
*/
-static void unmask_scoped_access(const struct landlock_ruleset *const client,
- const struct landlock_ruleset *const server,
+static void unmask_scoped_access(const struct landlock_domain *const client,
+ const struct landlock_domain *const server,
struct layer_access_masks *const masks,
const access_mask_t access)
{
@@ -1633,7 +1631,7 @@ static void unmask_scoped_access(const struct landlock_ruleset *const client,
static int hook_unix_find(const struct path *const path, struct sock *other,
int flags)
{
- const struct landlock_ruleset *dom_other;
+ const struct landlock_domain *dom_other;
const struct landlock_cred_security *subject;
struct layer_access_masks layer_masks;
struct landlock_request request = {};
@@ -1914,7 +1912,7 @@ static bool control_current_fowner(struct fown_struct *const fown)
static void hook_file_set_fowner(struct file *file)
{
- struct landlock_ruleset *prev_dom;
+ struct landlock_domain *prev_dom;
struct landlock_cred_security fown_subject = {};
size_t fown_layer = 0;
@@ -1926,7 +1924,7 @@ static void hook_file_set_fowner(struct file *file)
landlock_get_applicable_subject(
current_cred(), signal_scope, &fown_layer);
if (new_subject) {
- landlock_get_ruleset(new_subject->domain);
+ landlock_get_domain(new_subject->domain);
fown_subject = *new_subject;
}
}
@@ -1938,12 +1936,12 @@ static void hook_file_set_fowner(struct file *file)
#endif /* CONFIG_AUDIT*/
/* May be called in an RCU read-side critical section. */
- landlock_put_ruleset_deferred(prev_dom);
+ landlock_put_domain_deferred(prev_dom);
}
static void hook_file_free_security(struct file *file)
{
- landlock_put_ruleset_deferred(landlock_file(file)->fown_subject.domain);
+ landlock_put_domain_deferred(landlock_file(file)->fown_subject.domain);
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
diff --git a/security/landlock/net.c b/security/landlock/net.c
index 34a72a4f833d..de108b3277bc 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -32,8 +32,7 @@ int landlock_append_net_rule(struct landlock_ruleset *const ruleset,
BUILD_BUG_ON(sizeof(port) > sizeof(id.key.data));
/* Transforms relative access rights to absolute ones. */
- access_rights |= LANDLOCK_MASK_ACCESS_NET &
- ~landlock_get_net_access_mask(ruleset, 0);
+ access_rights |= LANDLOCK_MASK_ACCESS_NET & ~ruleset->layer.net;
mutex_lock(&ruleset->lock);
err = landlock_insert_rule(ruleset, id, access_rights);
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 0cf31a7e4c7b..c220e0f9cf5f 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -20,22 +20,27 @@
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/workqueue.h>
#include "access.h"
-#include "domain.h"
#include "limits.h"
#include "object.h"
#include "ruleset.h"
-static struct landlock_ruleset *create_ruleset(const u32 num_layers)
+struct landlock_ruleset *
+landlock_create_ruleset(const access_mask_t fs_access_mask,
+ const access_mask_t net_access_mask,
+ const access_mask_t scope_mask)
{
struct landlock_ruleset *new_ruleset;
- new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers,
- GFP_KERNEL_ACCOUNT);
+ /* Informs about useless ruleset. */
+ if (!fs_access_mask && !net_access_mask && !scope_mask)
+ return ERR_PTR(-ENOMSG);
+
+ new_ruleset = kzalloc(sizeof(*new_ruleset), GFP_KERNEL_ACCOUNT);
if (!new_ruleset)
return ERR_PTR(-ENOMEM);
+
refcount_set(&new_ruleset->usage, 1);
mutex_init(&new_ruleset->lock);
new_ruleset->rules.root_inode = RB_ROOT;
@@ -44,34 +49,21 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
new_ruleset->rules.root_net_port = RB_ROOT;
#endif /* IS_ENABLED(CONFIG_INET) */
- new_ruleset->num_layers = num_layers;
- /*
- * hierarchy = NULL
- * rules.num_rules = 0
- * access_masks[] = 0
- */
- return new_ruleset;
-}
-
-struct landlock_ruleset *
-landlock_create_ruleset(const access_mask_t fs_access_mask,
- const access_mask_t net_access_mask,
- const access_mask_t scope_mask)
-{
- struct landlock_ruleset *new_ruleset;
-
- /* Informs about useless ruleset. */
- if (!fs_access_mask && !net_access_mask && !scope_mask)
- return ERR_PTR(-ENOMSG);
- new_ruleset = create_ruleset(1);
- if (IS_ERR(new_ruleset))
- return new_ruleset;
- if (fs_access_mask)
- landlock_add_fs_access_mask(new_ruleset, fs_access_mask, 0);
- if (net_access_mask)
- landlock_add_net_access_mask(new_ruleset, net_access_mask, 0);
- if (scope_mask)
- landlock_add_scope_mask(new_ruleset, scope_mask, 0);
+ /* Should already be checked in sys_landlock_create_ruleset(). */
+ if (fs_access_mask) {
+ WARN_ON_ONCE(fs_access_mask !=
+ (fs_access_mask & LANDLOCK_MASK_ACCESS_FS));
+ new_ruleset->layer.fs |= fs_access_mask;
+ }
+ if (net_access_mask) {
+ WARN_ON_ONCE(net_access_mask !=
+ (net_access_mask & LANDLOCK_MASK_ACCESS_NET));
+ new_ruleset->layer.net |= net_access_mask;
+ }
+ if (scope_mask) {
+ WARN_ON_ONCE(scope_mask != (scope_mask & LANDLOCK_MASK_SCOPE));
+ new_ruleset->layer.scope |= scope_mask;
+ }
return new_ruleset;
}
@@ -128,7 +120,7 @@ create_rule(const struct landlock_id id,
return ERR_PTR(-ENOMEM);
RB_CLEAR_NODE(&new_rule->node);
if (is_object_pointer(id.type)) {
- /* This should have been caught by insert_rule(). */
+ /* This should have been caught by landlock_rule_insert(). */
WARN_ON_ONCE(!id.key.object);
landlock_get_object(id.key.object);
}
@@ -144,12 +136,6 @@ create_rule(const struct landlock_id id,
return new_rule;
}
-static struct rb_root *get_root(struct landlock_ruleset *const ruleset,
- const enum landlock_key_type key_type)
-{
- return landlock_get_rule_root(&ruleset->rules, key_type);
-}
-
static void free_rule(struct landlock_rule *const rule,
const enum landlock_key_type key_type)
{
@@ -166,16 +152,12 @@ static void build_check_ruleset(void)
const struct landlock_rules rules = {
.num_rules = ~0,
};
- const struct landlock_ruleset ruleset = {
- .num_layers = ~0,
- };
BUILD_BUG_ON(rules.num_rules < LANDLOCK_MAX_NUM_RULES);
- BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
}
/**
- * insert_rule - Create and insert a rule in a rule set
+ * landlock_rule_insert - Create and insert a rule in a rule set
*
* @rules: The rule storage to be updated. The caller is responsible for
* any required locking. For rulesets, this means holding
@@ -197,10 +179,10 @@ static void build_check_ruleset(void)
*
* Return: 0 on success, -errno on failure.
*/
-static int insert_rule(struct landlock_rules *const rules,
- const struct landlock_id id,
- const struct landlock_layer (*layers)[],
- const size_t num_layers)
+int landlock_rule_insert(struct landlock_rules *const rules,
+ const struct landlock_id id,
+ const struct landlock_layer (*layers)[],
+ const size_t num_layers)
{
struct rb_node **walker_node;
struct rb_node *parent_node = NULL;
@@ -240,7 +222,7 @@ static int insert_rule(struct landlock_rules *const rules,
if ((*layers)[0].level == 0) {
/*
* Extends access rights when the request comes from
- * landlock_add_rule(2), i.e. contained by a ruleset.
+ * landlock_add_rule(2), i.e. @rules is not a domain.
*/
if (WARN_ON_ONCE(this->num_layers != 1))
return -EINVAL;
@@ -301,176 +283,14 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
{
struct landlock_layer layers[] = { {
.access = access,
- /* When @level is zero, insert_rule() extends @ruleset. */
+ /* When @level is zero, landlock_rule_insert() extends @ruleset. */
.level = 0,
} };
build_check_layer();
lockdep_assert_held(&ruleset->lock);
- return insert_rule(&ruleset->rules, id, &layers, ARRAY_SIZE(layers));
-}
-
-static int merge_tree(struct landlock_ruleset *const dst,
- struct landlock_ruleset *const src,
- const enum landlock_key_type key_type)
-{
- struct landlock_rule *walker_rule, *next_rule;
- struct rb_root *src_root;
- int err = 0;
-
- might_sleep();
- lockdep_assert_held(&dst->lock);
- lockdep_assert_held(&src->lock);
-
- src_root = get_root(src, key_type);
- if (IS_ERR(src_root))
- return PTR_ERR(src_root);
-
- /* Merges the @src tree. */
- rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, src_root,
- node) {
- struct landlock_layer layers[] = { {
- .level = dst->num_layers,
- } };
- const struct landlock_id id = {
- .key = walker_rule->key,
- .type = key_type,
- };
-
- if (WARN_ON_ONCE(walker_rule->num_layers != 1))
- return -EINVAL;
-
- if (WARN_ON_ONCE(walker_rule->layers[0].level != 0))
- return -EINVAL;
-
- layers[0].access = walker_rule->layers[0].access;
-
- err = insert_rule(&dst->rules, id, &layers, ARRAY_SIZE(layers));
- if (err)
- return err;
- }
- return err;
-}
-
-static int merge_ruleset(struct landlock_ruleset *const dst,
- struct landlock_ruleset *const src)
-{
- int err = 0;
-
- might_sleep();
- /* Should already be checked by landlock_merge_ruleset() */
- if (WARN_ON_ONCE(!src))
- return 0;
- /* Only merge into a domain. */
- if (WARN_ON_ONCE(!dst || !dst->hierarchy))
- return -EINVAL;
-
- /* Locks @dst first because we are its only owner. */
- mutex_lock(&dst->lock);
- mutex_lock_nested(&src->lock, SINGLE_DEPTH_NESTING);
-
- /* Stacks the new layer. */
- if (WARN_ON_ONCE(src->num_layers != 1 || dst->num_layers < 1)) {
- err = -EINVAL;
- goto out_unlock;
- }
- dst->access_masks[dst->num_layers - 1] =
- landlock_upgrade_handled_access_masks(src->access_masks[0]);
-
- /* Merges the @src inode tree. */
- err = merge_tree(dst, src, LANDLOCK_KEY_INODE);
- if (err)
- goto out_unlock;
-
-#if IS_ENABLED(CONFIG_INET)
- /* Merges the @src network port tree. */
- err = merge_tree(dst, src, LANDLOCK_KEY_NET_PORT);
- if (err)
- goto out_unlock;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
-out_unlock:
- mutex_unlock(&src->lock);
- mutex_unlock(&dst->lock);
- return err;
-}
-
-static int inherit_tree(struct landlock_ruleset *const parent,
- struct landlock_ruleset *const child,
- const enum landlock_key_type key_type)
-{
- struct landlock_rule *walker_rule, *next_rule;
- struct rb_root *parent_root;
- int err = 0;
-
- might_sleep();
- lockdep_assert_held(&parent->lock);
- lockdep_assert_held(&child->lock);
-
- parent_root = get_root(parent, key_type);
- if (IS_ERR(parent_root))
- return PTR_ERR(parent_root);
-
- /* Copies the @parent inode or network tree. */
- rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
- parent_root, node) {
- const struct landlock_id id = {
- .key = walker_rule->key,
- .type = key_type,
- };
-
- err = insert_rule(&child->rules, id, &walker_rule->layers,
- walker_rule->num_layers);
- if (err)
- return err;
- }
- return err;
-}
-
-static int inherit_ruleset(struct landlock_ruleset *const parent,
- struct landlock_ruleset *const child)
-{
- int err = 0;
-
- might_sleep();
- if (!parent)
- return 0;
-
- /* Locks @child first because we are its only owner. */
- mutex_lock(&child->lock);
- mutex_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
-
- /* Copies the @parent inode tree. */
- err = inherit_tree(parent, child, LANDLOCK_KEY_INODE);
- if (err)
- goto out_unlock;
-
-#if IS_ENABLED(CONFIG_INET)
- /* Copies the @parent network port tree. */
- err = inherit_tree(parent, child, LANDLOCK_KEY_NET_PORT);
- if (err)
- goto out_unlock;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
- if (WARN_ON_ONCE(child->num_layers <= parent->num_layers)) {
- err = -EINVAL;
- goto out_unlock;
- }
- /* Copies the parent layer stack and leaves a space for the new layer. */
- memcpy(child->access_masks, parent->access_masks,
- flex_array_size(parent, access_masks, parent->num_layers));
-
- if (WARN_ON_ONCE(!parent->hierarchy)) {
- err = -EINVAL;
- goto out_unlock;
- }
- landlock_get_hierarchy(parent->hierarchy);
- child->hierarchy->parent = parent->hierarchy;
-
-out_unlock:
- mutex_unlock(&parent->lock);
- mutex_unlock(&child->lock);
- return err;
+ return landlock_rule_insert(&ruleset->rules, id, &layers,
+ ARRAY_SIZE(layers));
}
void landlock_free_rules(struct landlock_rules *const rules)
@@ -493,7 +313,6 @@ static void free_ruleset(struct landlock_ruleset *const ruleset)
{
might_sleep();
landlock_free_rules(&ruleset->rules);
- landlock_put_hierarchy(ruleset->hierarchy);
kfree(ruleset);
}
@@ -503,81 +322,3 @@ void landlock_put_ruleset(struct landlock_ruleset *const ruleset)
if (ruleset && refcount_dec_and_test(&ruleset->usage))
free_ruleset(ruleset);
}
-
-static void free_ruleset_work(struct work_struct *const work)
-{
- struct landlock_ruleset *ruleset;
-
- ruleset = container_of(work, struct landlock_ruleset, work_free);
- free_ruleset(ruleset);
-}
-
-/* Only called by hook_cred_free(). */
-void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
-{
- if (ruleset && refcount_dec_and_test(&ruleset->usage)) {
- INIT_WORK(&ruleset->work_free, free_ruleset_work);
- schedule_work(&ruleset->work_free);
- }
-}
-
-/**
- * landlock_merge_ruleset - Merge a ruleset with a domain
- *
- * @parent: Parent domain.
- * @ruleset: New ruleset to be merged.
- *
- * The current task is requesting to be restricted. The subjective credentials
- * must not be in an overridden state. cf. landlock_init_hierarchy_log().
- *
- * Return: A new domain merging @parent and @ruleset on success, or ERR_PTR()
- * on failure. If @parent is NULL, the new domain duplicates @ruleset.
- */
-struct landlock_ruleset *
-landlock_merge_ruleset(struct landlock_ruleset *const parent,
- struct landlock_ruleset *const ruleset)
-{
- struct landlock_ruleset *new_dom __free(landlock_put_ruleset) = NULL;
- u32 num_layers;
- int err;
-
- might_sleep();
- if (WARN_ON_ONCE(!ruleset || parent == ruleset))
- return ERR_PTR(-EINVAL);
-
- if (parent) {
- if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
- return ERR_PTR(-E2BIG);
- num_layers = parent->num_layers + 1;
- } else {
- num_layers = 1;
- }
-
- /* Creates a new domain... */
- new_dom = create_ruleset(num_layers);
- if (IS_ERR(new_dom))
- return new_dom;
-
- new_dom->hierarchy =
- kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
- if (!new_dom->hierarchy)
- return ERR_PTR(-ENOMEM);
-
- refcount_set(&new_dom->hierarchy->usage, 1);
-
- /* ...as a child of @parent... */
- err = inherit_ruleset(parent, new_dom);
- if (err)
- return ERR_PTR(err);
-
- /* ...and including @ruleset. */
- err = merge_ruleset(new_dom, ruleset);
- if (err)
- return ERR_PTR(err);
-
- err = landlock_init_hierarchy_log(new_dom->hierarchy);
- if (err)
- return ERR_PTR(err);
-
- return no_free_ptr(new_dom);
-}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 1d3a9c36eb74..bf127ff7496e 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -14,14 +14,11 @@
#include <linux/mutex.h>
#include <linux/rbtree.h>
#include <linux/refcount.h>
-#include <linux/workqueue.h>
#include "access.h"
#include "limits.h"
#include "object.h"
-struct landlock_hierarchy;
-
/**
* struct landlock_layer - Access rights for a given layer
*/
@@ -147,54 +144,20 @@ struct landlock_ruleset {
* @rules: Red-black tree storage for rules.
*/
struct landlock_rules rules;
-
/**
- * @hierarchy: Enables hierarchy identification even when a parent
- * domain vanishes. This is needed for the ptrace protection.
+ * @lock: Protects against concurrent modifications of @rules, if @usage
+ * is greater than zero.
+ */
+ struct mutex lock;
+ /**
+ * @usage: Number of file descriptors referencing this ruleset.
*/
- struct landlock_hierarchy *hierarchy;
- union {
- /**
- * @work_free: Enables to free a ruleset within a lockless
- * section. This is only used by
- * landlock_put_ruleset_deferred() when @usage reaches zero. The
- * fields @lock, @usage, @num_layers and @access_masks are then
- * unused.
- */
- struct work_struct work_free;
- struct {
- /**
- * @lock: Protects against concurrent modifications of
- * @root, if @usage is greater than zero.
- */
- struct mutex lock;
- /**
- * @usage: Number of processes (i.e. domains) or file
- * descriptors referencing this ruleset.
- */
- refcount_t usage;
- /**
- * @num_layers: Number of layers that are used in this
- * ruleset. This enables to check that all the layers
- * allow an access request. A value of 0 identifies a
- * non-merged ruleset (i.e. not a domain).
- */
- u32 num_layers;
- /**
- * @access_masks: Contains the subset of filesystem and
- * network actions that are restricted by a ruleset.
- * A domain saves all layers of merged rulesets in a
- * stack (FAM), starting from the first layer to the
- * last one. These layers are used when merging
- * rulesets, for user space backward compatibility
- * (i.e. future-proof), and to properly handle merged
- * rulesets without overlapping access rights. These
- * layers are set once and never changed for the
- * lifetime of the ruleset.
- */
- struct access_masks access_masks[];
- };
- };
+ refcount_t usage;
+ /**
+ * @layer: Contains the subset of filesystem and network actions that
+ * are handled by this ruleset.
+ */
+ struct access_masks layer;
};
struct landlock_ruleset *
@@ -203,7 +166,6 @@ landlock_create_ruleset(const access_mask_t access_mask_fs,
const access_mask_t scope_mask);
void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
-void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
DEFINE_FREE(landlock_put_ruleset, struct landlock_ruleset *,
if (!IS_ERR_OR_NULL(_T)) landlock_put_ruleset(_T))
@@ -212,11 +174,12 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
const struct landlock_id id,
const access_mask_t access);
-void landlock_free_rules(struct landlock_rules *const rules);
+int landlock_rule_insert(struct landlock_rules *const rules,
+ const struct landlock_id id,
+ const struct landlock_layer (*layers)[],
+ const size_t num_layers);
-struct landlock_ruleset *
-landlock_merge_ruleset(struct landlock_ruleset *const parent,
- struct landlock_ruleset *const ruleset);
+void landlock_free_rules(struct landlock_rules *const rules);
/**
* landlock_get_rule_root - Get the root of a rule tree by key type
@@ -251,62 +214,4 @@ static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
refcount_inc(&ruleset->usage);
}
-static inline void
-landlock_add_fs_access_mask(struct landlock_ruleset *const ruleset,
- const access_mask_t fs_access_mask,
- const u16 layer_level)
-{
- access_mask_t fs_mask = fs_access_mask & LANDLOCK_MASK_ACCESS_FS;
-
- /* Should already be checked in sys_landlock_create_ruleset(). */
- WARN_ON_ONCE(fs_access_mask != fs_mask);
- ruleset->access_masks[layer_level].fs |= fs_mask;
-}
-
-static inline void
-landlock_add_net_access_mask(struct landlock_ruleset *const ruleset,
- const access_mask_t net_access_mask,
- const u16 layer_level)
-{
- access_mask_t net_mask = net_access_mask & LANDLOCK_MASK_ACCESS_NET;
-
- /* Should already be checked in sys_landlock_create_ruleset(). */
- WARN_ON_ONCE(net_access_mask != net_mask);
- ruleset->access_masks[layer_level].net |= net_mask;
-}
-
-static inline void
-landlock_add_scope_mask(struct landlock_ruleset *const ruleset,
- const access_mask_t scope_mask, const u16 layer_level)
-{
- access_mask_t mask = scope_mask & LANDLOCK_MASK_SCOPE;
-
- /* Should already be checked in sys_landlock_create_ruleset(). */
- WARN_ON_ONCE(scope_mask != mask);
- ruleset->access_masks[layer_level].scope |= mask;
-}
-
-static inline access_mask_t
-landlock_get_fs_access_mask(const struct landlock_ruleset *const ruleset,
- const u16 layer_level)
-{
- /* Handles all initially denied by default access rights. */
- return ruleset->access_masks[layer_level].fs |
- _LANDLOCK_ACCESS_FS_INITIALLY_DENIED;
-}
-
-static inline access_mask_t
-landlock_get_net_access_mask(const struct landlock_ruleset *const ruleset,
- const u16 layer_level)
-{
- return ruleset->access_masks[layer_level].net;
-}
-
-static inline access_mask_t
-landlock_get_scope_mask(const struct landlock_ruleset *const ruleset,
- const u16 layer_level)
-{
- return ruleset->access_masks[layer_level].scope;
-}
-
#endif /* _SECURITY_LANDLOCK_RULESET_H */
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index accfd2e5a0cd..73ccc32d0afd 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -283,8 +283,6 @@ static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
if (!(fd_file(ruleset_f)->f_mode & mode))
return ERR_PTR(-EPERM);
ruleset = fd_file(ruleset_f)->private_data;
- if (WARN_ON_ONCE(ruleset->num_layers != 1))
- return ERR_PTR(-EINVAL);
landlock_get_ruleset(ruleset);
return ruleset;
}
@@ -341,7 +339,7 @@ static int add_rule_path_beneath(struct landlock_ruleset *const ruleset,
return -ENOMSG;
/* Checks that allowed_access matches the @ruleset constraints. */
- mask = ruleset->access_masks[0].fs;
+ mask = ruleset->layer.fs;
if ((path_beneath_attr.allowed_access | mask) != mask)
return -EINVAL;
@@ -377,7 +375,7 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
return -ENOMSG;
/* Checks that allowed_access matches the @ruleset constraints. */
- mask = landlock_get_net_access_mask(ruleset, 0);
+ mask = ruleset->layer.net;
if ((net_port_attr.allowed_access | mask) != mask)
return -EINVAL;
@@ -556,7 +554,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
* manipulating the current credentials because they are
* dedicated per thread.
*/
- struct landlock_ruleset *const new_dom =
+ struct landlock_domain *const new_dom =
landlock_merge_ruleset(new_llcred->domain, ruleset);
if (IS_ERR(new_dom)) {
abort_creds(new_cred);
@@ -571,7 +569,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
#endif /* CONFIG_AUDIT */
/* Replaces the old (prepared) domain. */
- landlock_put_ruleset(new_llcred->domain);
+ landlock_put_domain(new_llcred->domain);
new_llcred->domain = new_dom;
#ifdef CONFIG_AUDIT
diff --git a/security/landlock/task.c b/security/landlock/task.c
index 6d46042132ce..2e7ee62958b2 100644
--- a/security/landlock/task.c
+++ b/security/landlock/task.c
@@ -41,8 +41,8 @@
* Return: True if @parent is an ancestor of or equal to @child, false
* otherwise.
*/
-static bool domain_scope_le(const struct landlock_ruleset *const parent,
- const struct landlock_ruleset *const child)
+static bool domain_scope_le(const struct landlock_domain *const parent,
+ const struct landlock_domain *const child)
{
const struct landlock_hierarchy *walker;
@@ -63,8 +63,8 @@ static bool domain_scope_le(const struct landlock_ruleset *const parent,
return false;
}
-static int domain_ptrace(const struct landlock_ruleset *const parent,
- const struct landlock_ruleset *const child)
+static int domain_ptrace(const struct landlock_domain *const parent,
+ const struct landlock_domain *const child)
{
if (domain_scope_le(parent, child))
return 0;
@@ -97,7 +97,7 @@ static int hook_ptrace_access_check(struct task_struct *const child,
scoped_guard(rcu)
{
- const struct landlock_ruleset *const child_dom =
+ const struct landlock_domain *const child_dom =
landlock_get_task_domain(child);
err = domain_ptrace(parent_subject->domain, child_dom);
}
@@ -136,7 +136,7 @@ static int hook_ptrace_access_check(struct task_struct *const child,
static int hook_ptrace_traceme(struct task_struct *const parent)
{
const struct landlock_cred_security *parent_subject;
- const struct landlock_ruleset *child_dom;
+ const struct landlock_domain *child_dom;
int err;
child_dom = landlock_get_current_domain();
@@ -177,8 +177,8 @@ static int hook_ptrace_traceme(struct task_struct *const parent)
* Return: True if @server is in a different domain from @client and @client
* is scoped to access @server (i.e. access should be denied), false otherwise.
*/
-static bool domain_is_scoped(const struct landlock_ruleset *const client,
- const struct landlock_ruleset *const server,
+static bool domain_is_scoped(const struct landlock_domain *const client,
+ const struct landlock_domain *const server,
access_mask_t scope)
{
int client_layer, server_layer;
@@ -237,9 +237,9 @@ static bool domain_is_scoped(const struct landlock_ruleset *const client,
}
static bool sock_is_scoped(struct sock *const other,
- const struct landlock_ruleset *const domain)
+ const struct landlock_domain *const domain)
{
- const struct landlock_ruleset *dom_other;
+ const struct landlock_domain *dom_other;
/* The credentials will not change. */
lockdep_assert_held(&unix_sk(other)->lock);
--
2.53.0
^ permalink raw reply related
* [PATCH v2 02/17] landlock: Move domain query functions to domain.c
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
To: Christian Brauner, Günther Noack, Steven Rostedt
Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>
Grouping domain-specific code in one compilation unit reduces coupling
between domain and ruleset implementations.
Move the access-check functions that only operate on domains:
- landlock_find_rule() (from ruleset.c to domain.c)
- landlock_unmask_layers() (from ruleset.c to domain.c)
- landlock_init_layer_masks() (from ruleset.c to domain.c)
- landlock_union_access_masks() (from ruleset.h to domain.h)
These functions are called during the pathwalk and network access checks
to evaluate whether a domain grants the requested access. They do not
modify the domain or its rules.
The merge and inherit chain (merge_tree, merge_ruleset, inherit_tree,
inherit_ruleset, landlock_merge_ruleset) stays in ruleset.c for now
because it calls the static create_ruleset() allocator. A following
commit moves it when the domain type switch eliminates the dependency on
create_ruleset().
Expand the landlock_unmask_layers() comment to document the per-layer
composition semantics.
No behavioral change. Function signatures are unchanged; only
mechanical adjustments for the struct landlock_rules embedding
introduced by the previous commit.
Cc: Günther Noack <gnoack@google.com>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
Changes since v1:
- New patch.
---
security/landlock/domain.c | 150 ++++++++++++++++++++++++++++++++++++
security/landlock/domain.h | 38 +++++++++
security/landlock/net.c | 1 +
security/landlock/ruleset.c | 135 --------------------------------
security/landlock/ruleset.h | 38 ---------
5 files changed, 189 insertions(+), 173 deletions(-)
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index 378d86974ffb..cb79edf5df02 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -10,11 +10,17 @@
#include <kunit/test.h>
#include <linux/bitops.h>
#include <linux/bits.h>
+#include <linux/cleanup.h>
#include <linux/cred.h>
+#include <linux/err.h>
#include <linux/file.h>
+#include <linux/lockdep.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/overflow.h>
#include <linux/path.h>
#include <linux/pid.h>
+#include <linux/rbtree.h>
#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/signal.h>
@@ -26,6 +32,8 @@
#include "common.h"
#include "domain.h"
#include "id.h"
+#include "limits.h"
+#include "object.h"
#include "ruleset.h"
static void free_domain(struct landlock_domain *const domain)
@@ -59,6 +67,148 @@ void landlock_put_domain_deferred(struct landlock_domain *const domain)
}
}
+/* The returned access has the same lifetime as @ruleset. */
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+ const struct landlock_id id)
+{
+ const struct rb_root *root;
+ const struct rb_node *node;
+
+ root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
+ id.type);
+ if (IS_ERR(root))
+ return NULL;
+ node = root->rb_node;
+
+ while (node) {
+ struct landlock_rule *this =
+ rb_entry(node, struct landlock_rule, node);
+
+ if (this->key.data == id.key.data)
+ return this;
+ if (this->key.data < id.key.data)
+ node = node->rb_right;
+ else
+ node = node->rb_left;
+ }
+ return NULL;
+}
+
+/**
+ * landlock_unmask_layers - Remove the access rights in @masks which are
+ * granted in @rule
+ *
+ * Updates the set of (per-layer) unfulfilled access rights @masks so that all
+ * the access rights granted in @rule are removed from it (because they are now
+ * fulfilled).
+ *
+ * @rule: A rule that grants a set of access rights for each layer.
+ * @masks: A matrix of unfulfilled access rights for each layer.
+ *
+ * Return: True if the request is allowed (i.e. the access rights granted all
+ * remaining unfulfilled access rights and masks has no leftover set bits).
+ */
+bool landlock_unmask_layers(const struct landlock_rule *const rule,
+ struct layer_access_masks *masks)
+{
+ if (!masks)
+ return true;
+ if (!rule)
+ return false;
+
+ /*
+ * An access is granted if, for each policy layer, at least one rule
+ * encountered on the pathwalk grants the requested access, regardless
+ * of its position in the layer stack. We must then check the remaining
+ * layers for each inode, from the first added layer to the last one.
+ * When there are multiple requested accesses, for each policy layer,
+ * the full set of requested accesses may not be granted by only one
+ * rule, but by the union (binary OR) of multiple rules. For example,
+ * /a/b <execute> + /a <read> grants /a/b <execute + read>.
+ *
+ * This function is called once per matching rule during the pathwalk,
+ * progressively clearing bits in @masks. The overall access decision
+ * is: access is granted iff FOR-ALL layers l, masks->access[l] == 0.
+ * When two independent mechanisms can each grant access within a layer
+ * (e.g. a path rule OR a scope exception), the composition must
+ * evaluate per-layer: FOR-ALL l (A(l) OR B(l)), not (FOR-ALL l A(l)) OR
+ * (FOR-ALL l B(l)), to prevent bypass when different layers grant via
+ * different mechanisms.
+ */
+ for (size_t i = 0; i < rule->num_layers; i++) {
+ const struct landlock_layer *const layer = &rule->layers[i];
+
+ /* Clear the bits where the layer in the rule grants access. */
+ masks->access[layer->level - 1] &= ~layer->access;
+ }
+
+ for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
+ if (masks->access[i])
+ return false;
+ }
+ return true;
+}
+
+typedef access_mask_t
+get_access_mask_t(const struct landlock_ruleset *const ruleset,
+ const u16 layer_level);
+
+/**
+ * landlock_init_layer_masks - Initialize layer masks from an access request
+ *
+ * Populates @masks such that for each access right in @access_request, the bits
+ * for all the layers are set where this access right is handled.
+ *
+ * @domain: The domain that defines the current restrictions.
+ * @access_request: The requested access rights to check.
+ * @masks: Layer access masks to populate.
+ * @key_type: The key type to switch between access masks of different types.
+ *
+ * Return: An access mask where each access right bit is set which is handled in
+ * any of the active layers in @domain.
+ */
+access_mask_t
+landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+ const access_mask_t access_request,
+ struct layer_access_masks *const masks,
+ const enum landlock_key_type key_type)
+{
+ access_mask_t handled_accesses = 0;
+ get_access_mask_t *get_access_mask;
+
+ switch (key_type) {
+ case LANDLOCK_KEY_INODE:
+ get_access_mask = landlock_get_fs_access_mask;
+ break;
+
+#if IS_ENABLED(CONFIG_INET)
+ case LANDLOCK_KEY_NET_PORT:
+ get_access_mask = landlock_get_net_access_mask;
+ break;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ /* An empty access request can happen because of O_WRONLY | O_RDWR. */
+ if (!access_request)
+ return 0;
+
+ for (size_t i = 0; i < domain->num_layers; i++) {
+ const access_mask_t handled = get_access_mask(domain, i);
+
+ masks->access[i] = access_request & handled;
+ handled_accesses |= masks->access[i];
+ }
+ for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++)
+ masks->access[i] = 0;
+
+ return handled_accesses;
+}
+
#ifdef CONFIG_AUDIT
/**
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index 66333b6122a9..afa97011ecd2 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -227,12 +227,50 @@ struct landlock_domain {
};
};
+/**
+ * landlock_union_access_masks - Return all access rights handled in the
+ * domain
+ *
+ * @domain: Landlock ruleset (used as a domain)
+ *
+ * Return: An access_masks result of the OR of all the domain's access masks.
+ */
+static inline struct access_masks
+landlock_union_access_masks(const struct landlock_ruleset *const domain)
+{
+ union access_masks_all matches = {};
+ size_t layer_level;
+
+ for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
+ union access_masks_all layer = {
+ .masks = domain->access_masks[layer_level],
+ };
+
+ matches.all |= layer.all;
+ }
+
+ return matches.masks;
+}
+
void landlock_put_domain(struct landlock_domain *const domain);
void landlock_put_domain_deferred(struct landlock_domain *const domain);
DEFINE_FREE(landlock_put_domain, struct landlock_domain *,
if (!IS_ERR_OR_NULL(_T)) landlock_put_domain(_T))
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+ const struct landlock_id id);
+
+bool landlock_unmask_layers(const struct landlock_rule *const rule,
+ struct layer_access_masks *masks);
+
+access_mask_t
+landlock_init_layer_masks(const struct landlock_ruleset *const domain,
+ const access_mask_t access_request,
+ struct layer_access_masks *masks,
+ const enum landlock_key_type key_type);
+
static inline void landlock_get_domain(struct landlock_domain *const domain)
{
if (domain)
diff --git a/security/landlock/net.c b/security/landlock/net.c
index c368649985c5..34a72a4f833d 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -15,6 +15,7 @@
#include "audit.h"
#include "common.h"
#include "cred.h"
+#include "domain.h"
#include "limits.h"
#include "net.h"
#include "ruleset.h"
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index a6835011af2b..0cf31a7e4c7b 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -581,138 +581,3 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
return no_free_ptr(new_dom);
}
-
-/*
- * The returned access has the same lifetime as @ruleset.
- */
-const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
- const struct landlock_id id)
-{
- const struct rb_root *root;
- const struct rb_node *node;
-
- root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
- id.type);
- if (IS_ERR(root))
- return NULL;
- node = root->rb_node;
-
- while (node) {
- struct landlock_rule *this =
- rb_entry(node, struct landlock_rule, node);
-
- if (this->key.data == id.key.data)
- return this;
- if (this->key.data < id.key.data)
- node = node->rb_right;
- else
- node = node->rb_left;
- }
- return NULL;
-}
-
-/**
- * landlock_unmask_layers - Remove the access rights in @masks
- * which are granted in @rule
- *
- * Updates the set of (per-layer) unfulfilled access rights @masks
- * so that all the access rights granted in @rule are removed from it
- * (because they are now fulfilled).
- *
- * @rule: A rule that grants a set of access rights for each layer
- * @masks: A matrix of unfulfilled access rights for each layer
- *
- * Return: True if the request is allowed (i.e. the access rights granted all
- * remaining unfulfilled access rights and masks has no leftover set bits).
- */
-bool landlock_unmask_layers(const struct landlock_rule *const rule,
- struct layer_access_masks *masks)
-{
- if (!masks)
- return true;
- if (!rule)
- return false;
-
- /*
- * An access is granted if, for each policy layer, at least one rule
- * encountered on the pathwalk grants the requested access,
- * regardless of its position in the layer stack. We must then check
- * the remaining layers for each inode, from the first added layer to
- * the last one. When there is multiple requested accesses, for each
- * policy layer, the full set of requested accesses may not be granted
- * by only one rule, but by the union (binary OR) of multiple rules.
- * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
- */
- for (size_t i = 0; i < rule->num_layers; i++) {
- const struct landlock_layer *const layer = &rule->layers[i];
-
- /* Clear the bits where the layer in the rule grants access. */
- masks->access[layer->level - 1] &= ~layer->access;
- }
-
- for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) {
- if (masks->access[i])
- return false;
- }
- return true;
-}
-
-typedef access_mask_t
-get_access_mask_t(const struct landlock_ruleset *const ruleset,
- const u16 layer_level);
-
-/**
- * landlock_init_layer_masks - Initialize layer masks from an access request
- *
- * Populates @masks such that for each access right in @access_request,
- * the bits for all the layers are set where this access right is handled.
- *
- * @domain: The domain that defines the current restrictions.
- * @access_request: The requested access rights to check.
- * @masks: Layer access masks to populate.
- * @key_type: The key type to switch between access masks of different types.
- *
- * Return: An access mask where each access right bit is set which is handled
- * in any of the active layers in @domain.
- */
-access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
- const access_mask_t access_request,
- struct layer_access_masks *const masks,
- const enum landlock_key_type key_type)
-{
- access_mask_t handled_accesses = 0;
- get_access_mask_t *get_access_mask;
-
- switch (key_type) {
- case LANDLOCK_KEY_INODE:
- get_access_mask = landlock_get_fs_access_mask;
- break;
-
-#if IS_ENABLED(CONFIG_INET)
- case LANDLOCK_KEY_NET_PORT:
- get_access_mask = landlock_get_net_access_mask;
- break;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
- default:
- WARN_ON_ONCE(1);
- return 0;
- }
-
- /* An empty access request can happen because of O_WRONLY | O_RDWR. */
- if (!access_request)
- return 0;
-
- for (size_t i = 0; i < domain->num_layers; i++) {
- const access_mask_t handled = get_access_mask(domain, i);
-
- masks->access[i] = access_request & handled;
- handled_accesses |= masks->access[i];
- }
- for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++)
- masks->access[i] = 0;
-
- return handled_accesses;
-}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index e7875a8b15df..1d3a9c36eb74 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -218,10 +218,6 @@ struct landlock_ruleset *
landlock_merge_ruleset(struct landlock_ruleset *const parent,
struct landlock_ruleset *const ruleset);
-const struct landlock_rule *
-landlock_find_rule(const struct landlock_ruleset *const ruleset,
- const struct landlock_id id);
-
/**
* landlock_get_rule_root - Get the root of a rule tree by key type
*
@@ -255,31 +251,6 @@ static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
refcount_inc(&ruleset->usage);
}
-/**
- * landlock_union_access_masks - Return all access rights handled in the
- * domain
- *
- * @domain: Landlock ruleset (used as a domain)
- *
- * Return: An access_masks result of the OR of all the domain's access masks.
- */
-static inline struct access_masks
-landlock_union_access_masks(const struct landlock_ruleset *const domain)
-{
- union access_masks_all matches = {};
- size_t layer_level;
-
- for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
- union access_masks_all layer = {
- .masks = domain->access_masks[layer_level],
- };
-
- matches.all |= layer.all;
- }
-
- return matches.masks;
-}
-
static inline void
landlock_add_fs_access_mask(struct landlock_ruleset *const ruleset,
const access_mask_t fs_access_mask,
@@ -338,13 +309,4 @@ landlock_get_scope_mask(const struct landlock_ruleset *const ruleset,
return ruleset->access_masks[layer_level].scope;
}
-bool landlock_unmask_layers(const struct landlock_rule *const rule,
- struct layer_access_masks *masks);
-
-access_mask_t
-landlock_init_layer_masks(const struct landlock_ruleset *const domain,
- const access_mask_t access_request,
- struct layer_access_masks *masks,
- const enum landlock_key_type key_type);
-
#endif /* _SECURITY_LANDLOCK_RULESET_H */
--
2.53.0
^ permalink raw reply related
* [PATCH v2 05/17] tracing: Add __print_untrusted_str()
From: Mickaël Salaün @ 2026-04-06 14:37 UTC (permalink / raw)
To: Christian Brauner, Günther Noack, Steven Rostedt
Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>
Landlock tracepoints expose filesystem paths and process names
that may contain spaces, equal signs, or other characters that
break ftrace field parsing.
Add a new __print_untrusted_str() helper to safely print strings after
escaping all special characters, including common separators (space,
equal sign), quotes, and backslashes. This transforms a string from an
untrusted source (e.g. user space) to make it:
- safe to parse,
- easy to read (for simple strings),
- easy to get back the original.
Cc: Günther Noack <gnoack@google.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tingmao Wang <m@maowtm.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
Changes since v1:
https://lore.kernel.org/r/20250523165741.693976-4-mic@digikod.net
- Remove WARN_ON() (pointed out by Steven Rostedt).
---
include/linux/trace_events.h | 2 ++
include/trace/stages/stage3_trace_output.h | 4 +++
include/trace/stages/stage7_class_define.h | 1 +
kernel/trace/trace_output.c | 41 ++++++++++++++++++++++
4 files changed, 48 insertions(+)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 37eb2f0f3dd8..7f4325d327ee 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -57,6 +57,8 @@ trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
const void *buf, size_t len, bool ascii);
+const char *trace_print_untrusted_str_seq(struct trace_seq *s, const char *str);
+
int trace_raw_output_prep(struct trace_iterator *iter,
struct trace_event *event);
extern __printf(2, 3)
diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h
index fce85ea2df1c..62e98babb969 100644
--- a/include/trace/stages/stage3_trace_output.h
+++ b/include/trace/stages/stage3_trace_output.h
@@ -133,6 +133,10 @@
trace_print_hex_dump_seq(p, prefix_str, prefix_type, \
rowsize, groupsize, buf, len, ascii)
+#undef __print_untrusted_str
+#define __print_untrusted_str(str) \
+ trace_print_untrusted_str_seq(p, __get_str(str))
+
#undef __print_ns_to_secs
#define __print_ns_to_secs(value) \
({ \
diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h
index fcd564a590f4..1164aacd550f 100644
--- a/include/trace/stages/stage7_class_define.h
+++ b/include/trace/stages/stage7_class_define.h
@@ -24,6 +24,7 @@
#undef __print_array
#undef __print_dynamic_array
#undef __print_hex_dump
+#undef __print_untrusted_str
#undef __get_buf
/*
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 1996d7aba038..9d14c7cc654d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,6 +16,7 @@
#include <linux/btf.h>
#include <linux/bpf.h>
#include <linux/hashtable.h>
+#include <linux/string_helpers.h>
#include "trace_output.h"
#include "trace_btf.h"
@@ -321,6 +322,46 @@ trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str,
}
EXPORT_SYMBOL(trace_print_hex_dump_seq);
+/**
+ * trace_print_untrusted_str_seq - print a string after escaping characters
+ * @s: trace seq struct to write to
+ * @src: The string to print
+ *
+ * Prints a string to a trace seq after escaping all special characters,
+ * including common separators (space, equal sign), quotes, and backslashes.
+ * This transforms a string from an untrusted source (e.g. user space) to make
+ * it:
+ * - safe to parse,
+ * - easy to read (for simple strings),
+ * - easy to get back the original.
+ */
+const char *trace_print_untrusted_str_seq(struct trace_seq *s,
+ const char *src)
+{
+ int escaped_size;
+ char *buf;
+ size_t buf_size = seq_buf_get_buf(&s->seq, &buf);
+ const char *ret = trace_seq_buffer_ptr(s);
+
+ /* Buffer exhaustion is normal when the trace buffer is full. */
+ if (!src || buf_size == 0)
+ return NULL;
+
+ escaped_size = string_escape_mem(src, strlen(src), buf, buf_size,
+ ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_NAP | ESCAPE_APPEND |
+ ESCAPE_OCTAL, " ='\"\\");
+ if (unlikely(escaped_size >= buf_size)) {
+ /* We need some room for the final '\0'. */
+ seq_buf_set_overflow(&s->seq);
+ s->full = 1;
+ return NULL;
+ }
+ seq_buf_commit(&s->seq, escaped_size);
+ trace_seq_putc(s, 0);
+ return ret;
+}
+EXPORT_SYMBOL(trace_print_untrusted_str_seq);
+
int trace_raw_output_prep(struct trace_iterator *iter,
struct trace_event *trace_event)
{
--
2.53.0
^ permalink raw reply related
* [PATCH v2 01/17] landlock: Prepare ruleset and domain type split
From: Mickaël Salaün @ 2026-04-06 14:36 UTC (permalink / raw)
To: Christian Brauner, Günther Noack, Steven Rostedt
Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
linux-security-module, linux-trace-kernel
In-Reply-To: <20260406143717.1815792-1-mic@digikod.net>
Rulesets and domains serve fundamentally different purposes: a ruleset
is mutable and user-facing, created by landlock_create_ruleset(), while
a domain is immutable after construction and enforced on tasks via
landlock_restrict_self(). Today both are represented by struct
landlock_ruleset, which conflates mutable and immutable state in a
single type: the lock field is unused by domains, the hierarchy field
is unused by rulesets, and lifecycle functions must handle both cases.
Prepare for a clean type split by introducing two new structures and
the helpers needed to construct domains from a separate compilation
unit:
- struct landlock_rules: holds the red-black tree roots and the rule
count. This storage type is shared by both rulesets and domains.
This decouples rule storage from the domain API; the backing data
structure could be changed independently (e.g. to a hash table,
cf. [1]).
- struct landlock_domain: the immutable domain enforced on tasks. It
has no lock field because its rules and access masks are immutable
once construction is complete. The name reflects the role, not the
internal data structure, to decouple the API from the
implementation.
Embed struct landlock_rules in struct landlock_ruleset, replacing the
individual root_inode, root_net_port, and num_rules fields. All field
accesses are updated mechanically.
Add landlock_get_rule_root() as a static inline helper in the header,
enabling constant propagation when the key type is known at compile
time. Extract landlock_free_rules() so that free_domain() can reuse
the rule-freeing logic without duplicating it.
Add domain lifecycle functions: landlock_get_domain(),
landlock_put_domain(), and landlock_put_domain_deferred(). Move
domain.o from landlock-$(CONFIG_AUDIT) to landlock-y because these
lifecycle functions are needed unconditionally, not just for audit
logging.
No behavioral change. The new types and lifecycle functions are not
yet used by any caller.
Cc: Günther Noack <gnoack@google.com>
Cc: Tingmao Wang <m@maowtm.org>
Link: https://lore.kernel.org/r/20250523165741.693976-1-mic@digikod.net [1]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
Changes since v1:
- New patch.
---
security/landlock/Makefile | 6 +--
security/landlock/domain.c | 35 ++++++++++++++++
security/landlock/domain.h | 69 +++++++++++++++++++++++++++++++
security/landlock/ruleset.c | 71 ++++++++++++++++----------------
security/landlock/ruleset.h | 81 +++++++++++++++++++++++++++----------
5 files changed, 201 insertions(+), 61 deletions(-)
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index ffa7646d99f3..23e13644916f 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -8,11 +8,11 @@ landlock-y := \
cred.o \
task.o \
fs.o \
- tsync.o
+ tsync.o \
+ domain.o
landlock-$(CONFIG_INET) += net.o
landlock-$(CONFIG_AUDIT) += \
id.o \
- audit.o \
- domain.o
+ audit.o
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index 06b6bd845060..378d86974ffb 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -15,14 +15,49 @@
#include <linux/mm.h>
#include <linux/path.h>
#include <linux/pid.h>
+#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/signal.h>
+#include <linux/slab.h>
#include <linux/uidgid.h>
+#include <linux/workqueue.h>
#include "access.h"
#include "common.h"
#include "domain.h"
#include "id.h"
+#include "ruleset.h"
+
+static void free_domain(struct landlock_domain *const domain)
+{
+ might_sleep();
+ landlock_free_rules(&domain->rules);
+ landlock_put_hierarchy(domain->hierarchy);
+ kfree(domain);
+}
+
+void landlock_put_domain(struct landlock_domain *const domain)
+{
+ might_sleep();
+ if (domain && refcount_dec_and_test(&domain->usage))
+ free_domain(domain);
+}
+
+static void free_domain_work(struct work_struct *const work)
+{
+ struct landlock_domain *domain;
+
+ domain = container_of(work, struct landlock_domain, work_free);
+ free_domain(domain);
+}
+
+void landlock_put_domain_deferred(struct landlock_domain *const domain)
+{
+ if (domain && refcount_dec_and_test(&domain->usage)) {
+ INIT_WORK(&domain->work_free, free_domain_work);
+ schedule_work(&domain->work_free);
+ }
+}
#ifdef CONFIG_AUDIT
diff --git a/security/landlock/domain.h b/security/landlock/domain.h
index a9d57db0120d..66333b6122a9 100644
--- a/security/landlock/domain.h
+++ b/security/landlock/domain.h
@@ -10,6 +10,7 @@
#ifndef _SECURITY_LANDLOCK_DOMAIN_H
#define _SECURITY_LANDLOCK_DOMAIN_H
+#include <linux/cleanup.h>
#include <linux/limits.h>
#include <linux/mm.h>
#include <linux/path.h>
@@ -17,9 +18,11 @@
#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/workqueue.h>
#include "access.h"
#include "audit.h"
+#include "ruleset.h"
enum landlock_log_status {
LANDLOCK_LOG_PENDING = 0,
@@ -170,4 +173,70 @@ static inline void landlock_put_hierarchy(struct landlock_hierarchy *hierarchy)
}
}
+/**
+ * struct landlock_domain - Immutable Landlock domain
+ *
+ * A domain is created from a ruleset by landlock_merge_ruleset() and enforced
+ * on a task. Once created, its rules and access masks are immutable. Unlike
+ * &struct landlock_ruleset, a domain has no lock field.
+ */
+struct landlock_domain {
+ /**
+ * @rules: Red-black tree storage for rules.
+ */
+ struct landlock_rules rules;
+ /**
+ * @hierarchy: Enables hierarchy identification even when a parent
+ * domain vanishes. This is needed for the ptrace and scope
+ * restrictions.
+ */
+ struct landlock_hierarchy *hierarchy;
+ union {
+ /**
+ * @work_free: Enables to free a domain within a lockless
+ * section. This is only used by landlock_put_domain_deferred()
+ * when @usage reaches zero. The fields @usage, @num_layers and
+ * @access_masks are then unused.
+ */
+ struct work_struct work_free;
+ struct {
+ /**
+ * @usage: Number of credentials referencing this
+ * domain.
+ */
+ refcount_t usage;
+ /**
+ * @num_layers: Number of layers that are used in this
+ * domain. This enables to check that all the layers
+ * allow an access request.
+ */
+ u32 num_layers;
+ /**
+ * @access_masks: Contains the subset of filesystem and
+ * network actions that are restricted by a domain. A
+ * domain saves all layers of merged rulesets in a stack
+ * (FAM), starting from the first layer to the last one.
+ * These layers are used when merging rulesets, for user
+ * space backward compatibility (i.e. future-proof), and
+ * to properly handle merged rulesets without
+ * overlapping access rights. These layers are set once
+ * and never changed for the lifetime of the domain.
+ */
+ struct access_masks access_masks[];
+ };
+ };
+};
+
+void landlock_put_domain(struct landlock_domain *const domain);
+void landlock_put_domain_deferred(struct landlock_domain *const domain);
+
+DEFINE_FREE(landlock_put_domain, struct landlock_domain *,
+ if (!IS_ERR_OR_NULL(_T)) landlock_put_domain(_T))
+
+static inline void landlock_get_domain(struct landlock_domain *const domain)
+{
+ if (domain)
+ refcount_inc(&domain->usage);
+}
+
#endif /* _SECURITY_LANDLOCK_DOMAIN_H */
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 181df7736bb9..a6835011af2b 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -38,16 +38,16 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
return ERR_PTR(-ENOMEM);
refcount_set(&new_ruleset->usage, 1);
mutex_init(&new_ruleset->lock);
- new_ruleset->root_inode = RB_ROOT;
+ new_ruleset->rules.root_inode = RB_ROOT;
#if IS_ENABLED(CONFIG_INET)
- new_ruleset->root_net_port = RB_ROOT;
+ new_ruleset->rules.root_net_port = RB_ROOT;
#endif /* IS_ENABLED(CONFIG_INET) */
new_ruleset->num_layers = num_layers;
/*
* hierarchy = NULL
- * num_rules = 0
+ * rules.num_rules = 0
* access_masks[] = 0
*/
return new_ruleset;
@@ -147,19 +147,7 @@ create_rule(const struct landlock_id id,
static struct rb_root *get_root(struct landlock_ruleset *const ruleset,
const enum landlock_key_type key_type)
{
- switch (key_type) {
- case LANDLOCK_KEY_INODE:
- return &ruleset->root_inode;
-
-#if IS_ENABLED(CONFIG_INET)
- case LANDLOCK_KEY_NET_PORT:
- return &ruleset->root_net_port;
-#endif /* IS_ENABLED(CONFIG_INET) */
-
- default:
- WARN_ON_ONCE(1);
- return ERR_PTR(-EINVAL);
- }
+ return landlock_get_rule_root(&ruleset->rules, key_type);
}
static void free_rule(struct landlock_rule *const rule,
@@ -175,19 +163,24 @@ static void free_rule(struct landlock_rule *const rule,
static void build_check_ruleset(void)
{
- const struct landlock_ruleset ruleset = {
+ const struct landlock_rules rules = {
.num_rules = ~0,
+ };
+ const struct landlock_ruleset ruleset = {
.num_layers = ~0,
};
- BUILD_BUG_ON(ruleset.num_rules < LANDLOCK_MAX_NUM_RULES);
+ BUILD_BUG_ON(rules.num_rules < LANDLOCK_MAX_NUM_RULES);
BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
}
/**
- * insert_rule - Create and insert a rule in a ruleset
+ * insert_rule - Create and insert a rule in a rule set
*
- * @ruleset: The ruleset to be updated.
+ * @rules: The rule storage to be updated. The caller is responsible for
+ * any required locking. For rulesets, this means holding
+ * landlock_ruleset.lock. For domains under construction, no lock is
+ * needed because the domain is not yet visible to other tasks.
* @id: The ID to build the new rule with. The underlying kernel object, if
* any, must be held by the caller.
* @layers: One or multiple layers to be copied into the new rule.
@@ -195,16 +188,16 @@ static void build_check_ruleset(void)
*
* When user space requests to add a new rule to a ruleset, @layers only
* contains one entry and this entry is not assigned to any level. In this
- * case, the new rule will extend @ruleset, similarly to a boolean OR between
+ * case, the new rule will extend @rules, similarly to a boolean OR between
* access rights.
*
* When merging a ruleset in a domain, or copying a domain, @layers will be
- * added to @ruleset as new constraints, similarly to a boolean AND between
- * access rights.
+ * added to @rules as new constraints, similarly to a boolean AND between access
+ * rights.
*
* Return: 0 on success, -errno on failure.
*/
-static int insert_rule(struct landlock_ruleset *const ruleset,
+static int insert_rule(struct landlock_rules *const rules,
const struct landlock_id id,
const struct landlock_layer (*layers)[],
const size_t num_layers)
@@ -215,14 +208,13 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
struct rb_root *root;
might_sleep();
- lockdep_assert_held(&ruleset->lock);
if (WARN_ON_ONCE(!layers))
return -ENOENT;
if (is_object_pointer(id.type) && WARN_ON_ONCE(!id.key.object))
return -ENOENT;
- root = get_root(ruleset, id.type);
+ root = landlock_get_rule_root(rules, id.type);
if (IS_ERR(root))
return PTR_ERR(root);
@@ -248,7 +240,7 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
if ((*layers)[0].level == 0) {
/*
* Extends access rights when the request comes from
- * landlock_add_rule(2), i.e. @ruleset is not a domain.
+ * landlock_add_rule(2), i.e. contained by a ruleset.
*/
if (WARN_ON_ONCE(this->num_layers != 1))
return -EINVAL;
@@ -276,14 +268,14 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
/* There is no match for @id. */
build_check_ruleset();
- if (ruleset->num_rules >= LANDLOCK_MAX_NUM_RULES)
+ if (rules->num_rules >= LANDLOCK_MAX_NUM_RULES)
return -E2BIG;
new_rule = create_rule(id, layers, num_layers, NULL);
if (IS_ERR(new_rule))
return PTR_ERR(new_rule);
rb_link_node(&new_rule->node, parent_node, walker_node);
rb_insert_color(&new_rule->node, root);
- ruleset->num_rules++;
+ rules->num_rules++;
return 0;
}
@@ -314,7 +306,8 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
} };
build_check_layer();
- return insert_rule(ruleset, id, &layers, ARRAY_SIZE(layers));
+ lockdep_assert_held(&ruleset->lock);
+ return insert_rule(&ruleset->rules, id, &layers, ARRAY_SIZE(layers));
}
static int merge_tree(struct landlock_ruleset *const dst,
@@ -352,7 +345,7 @@ static int merge_tree(struct landlock_ruleset *const dst,
layers[0].access = walker_rule->layers[0].access;
- err = insert_rule(dst, id, &layers, ARRAY_SIZE(layers));
+ err = insert_rule(&dst->rules, id, &layers, ARRAY_SIZE(layers));
if (err)
return err;
}
@@ -426,7 +419,7 @@ static int inherit_tree(struct landlock_ruleset *const parent,
.type = key_type,
};
- err = insert_rule(child, id, &walker_rule->layers,
+ err = insert_rule(&child->rules, id, &walker_rule->layers,
walker_rule->num_layers);
if (err)
return err;
@@ -480,21 +473,26 @@ static int inherit_ruleset(struct landlock_ruleset *const parent,
return err;
}
-static void free_ruleset(struct landlock_ruleset *const ruleset)
+void landlock_free_rules(struct landlock_rules *const rules)
{
struct landlock_rule *freeme, *next;
might_sleep();
- rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root_inode,
+ rbtree_postorder_for_each_entry_safe(freeme, next, &rules->root_inode,
node)
free_rule(freeme, LANDLOCK_KEY_INODE);
#if IS_ENABLED(CONFIG_INET)
rbtree_postorder_for_each_entry_safe(freeme, next,
- &ruleset->root_net_port, node)
+ &rules->root_net_port, node)
free_rule(freeme, LANDLOCK_KEY_NET_PORT);
#endif /* IS_ENABLED(CONFIG_INET) */
+}
+static void free_ruleset(struct landlock_ruleset *const ruleset)
+{
+ might_sleep();
+ landlock_free_rules(&ruleset->rules);
landlock_put_hierarchy(ruleset->hierarchy);
kfree(ruleset);
}
@@ -594,7 +592,8 @@ landlock_find_rule(const struct landlock_ruleset *const ruleset,
const struct rb_root *root;
const struct rb_node *node;
- root = get_root((struct landlock_ruleset *)ruleset, id.type);
+ root = landlock_get_rule_root((struct landlock_rules *)&ruleset->rules,
+ id.type);
if (IS_ERR(root))
return NULL;
node = root->rb_node;
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 889f4b30301a..e7875a8b15df 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -57,13 +57,12 @@ union landlock_key {
*/
enum landlock_key_type {
/**
- * @LANDLOCK_KEY_INODE: Type of &landlock_ruleset.root_inode's node
- * keys.
+ * @LANDLOCK_KEY_INODE: Type of &landlock_rules.root_inode's node keys.
*/
LANDLOCK_KEY_INODE = 1,
/**
- * @LANDLOCK_KEY_NET_PORT: Type of &landlock_ruleset.root_net_port's
- * node keys.
+ * @LANDLOCK_KEY_NET_PORT: Type of &landlock_rules.root_net_port's node
+ * keys.
*/
LANDLOCK_KEY_NET_PORT,
};
@@ -111,30 +110,44 @@ struct landlock_rule {
};
/**
- * struct landlock_ruleset - Landlock ruleset
+ * struct landlock_rules - Red-black tree storage for Landlock rules
*
- * This data structure must contain unique entries, be updatable, and quick to
- * match an object.
+ * This structure holds the rule trees shared by both rulesets and domains.
*/
-struct landlock_ruleset {
+struct landlock_rules {
/**
* @root_inode: Root of a red-black tree containing &struct
- * landlock_rule nodes with inode object. Once a ruleset is tied to a
- * process (i.e. as a domain), this tree is immutable until @usage
- * reaches zero.
+ * landlock_rule nodes with inode object. Immutable for domains.
*/
struct rb_root root_inode;
#if IS_ENABLED(CONFIG_INET)
/**
* @root_net_port: Root of a red-black tree containing &struct
- * landlock_rule nodes with network port. Once a ruleset is tied to a
- * process (i.e. as a domain), this tree is immutable until @usage
- * reaches zero.
+ * landlock_rule nodes with network port. Immutable for domains.
*/
struct rb_root root_net_port;
#endif /* IS_ENABLED(CONFIG_INET) */
+ /**
+ * @num_rules: Number of non-overlapping (i.e. not for the same object)
+ * rules in this tree storage.
+ */
+ u32 num_rules;
+};
+
+/**
+ * struct landlock_ruleset - Landlock ruleset
+ *
+ * This data structure must contain unique entries, be updatable, and quick to
+ * match an object.
+ */
+struct landlock_ruleset {
+ /**
+ * @rules: Red-black tree storage for rules.
+ */
+ struct landlock_rules rules;
+
/**
* @hierarchy: Enables hierarchy identification even when a parent
* domain vanishes. This is needed for the ptrace protection.
@@ -144,9 +157,9 @@ struct landlock_ruleset {
/**
* @work_free: Enables to free a ruleset within a lockless
* section. This is only used by
- * landlock_put_ruleset_deferred() when @usage reaches zero.
- * The fields @lock, @usage, @num_rules, @num_layers and
- * @access_masks are then unused.
+ * landlock_put_ruleset_deferred() when @usage reaches zero. The
+ * fields @lock, @usage, @num_layers and @access_masks are then
+ * unused.
*/
struct work_struct work_free;
struct {
@@ -160,11 +173,6 @@ struct landlock_ruleset {
* descriptors referencing this ruleset.
*/
refcount_t usage;
- /**
- * @num_rules: Number of non-overlapping (i.e. not for
- * the same object) rules in this ruleset.
- */
- u32 num_rules;
/**
* @num_layers: Number of layers that are used in this
* ruleset. This enables to check that all the layers
@@ -204,6 +212,8 @@ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
const struct landlock_id id,
const access_mask_t access);
+void landlock_free_rules(struct landlock_rules *const rules);
+
struct landlock_ruleset *
landlock_merge_ruleset(struct landlock_ruleset *const parent,
struct landlock_ruleset *const ruleset);
@@ -212,6 +222,33 @@ const struct landlock_rule *
landlock_find_rule(const struct landlock_ruleset *const ruleset,
const struct landlock_id id);
+/**
+ * landlock_get_rule_root - Get the root of a rule tree by key type
+ *
+ * @rules: The rules storage to look up.
+ * @key_type: The type of key to select the tree for.
+ *
+ * Return: A pointer to the rb_root, or ERR_PTR(-EINVAL) on unknown type.
+ */
+static inline struct rb_root *
+landlock_get_rule_root(struct landlock_rules *const rules,
+ const enum landlock_key_type key_type)
+{
+ switch (key_type) {
+ case LANDLOCK_KEY_INODE:
+ return &rules->root_inode;
+
+#if IS_ENABLED(CONFIG_INET)
+ case LANDLOCK_KEY_NET_PORT:
+ return &rules->root_net_port;
+#endif /* IS_ENABLED(CONFIG_INET) */
+
+ default:
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EINVAL);
+ }
+}
+
static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
{
if (ruleset)
--
2.53.0
^ permalink raw reply related
* [PATCH v2 00/17] Landlock tracepoints
From: Mickaël Salaün @ 2026-04-06 14:36 UTC (permalink / raw)
To: Christian Brauner, Günther Noack, Steven Rostedt
Cc: Mickaël Salaün, Jann Horn, Jeff Xu, Justin Suess,
Kees Cook, Masami Hiramatsu, Mathieu Desnoyers, Matthieu Buffet,
Mikhail Ivanov, Tingmao Wang, kernel-team, linux-fsdevel,
linux-security-module, linux-trace-kernel
Hi,
This series adds 13 tracepoints that cover the full Landlock lifecycle,
from ruleset creation to domain destruction. They can be used directly
via /sys/kernel/tracing/events/landlock/* or attached by eBPF programs
for richer introspection.
Patches 1-4 refactor Landlock internals: they split struct
landlock_domain from struct landlock_ruleset and move denial logging
into a common framework shared by audit and tracing. Patch 5 adds
__print_untrusted_str() to the tracing core. Patches 6-9 add
lifecycle tracepoints: ruleset creation and destruction, rule addition
for filesystem and network, domain enforcement and destruction, and
per-rule access checks. Patch 10 sets audit_net.sk for socket access
checks. Patches 11-12 add denial tracepoints for filesystem, network,
and scope operations. Patches 13-16 add selftests and patch 17 adds
documentation.
Each rule type has a dedicated tracepoint with strongly-typed fields
(dev/ino for filesystem, port for network), following the same approach
as the audit logs.
This feature is useful to troubleshoot policy issues and should limit
the need for custom debugging kernel code when developing new Landlock
features.
Landlock already has audit support for logging denied access requests,
which is useful to identify security issues or sandbox misconfiguration.
However, audit might not be enough to debug Landlock policies. The
main difference with audit events is that traces are disabled by
default, can be very verbose, and can be filtered according to process
and Landlock properties (e.g. domain ID).
As for audit, tracing may expose sensitive information about all
sandboxed processes on the system, and must only be accessible to the
system administrator. For unprivileged monitoring scoped to a single
sandbox (e.g., interactive permission prompts), Tingmao Wang's
"Landlock supervise" RFC [1] proposes a dedicated userspace API. The
infrastructure changes in this series (the domain type split, the
denial framework, and the tracepoint consistency guarantees) benefit
that approach.
I will release a companion tool that leverages these tracepoints to
monitor Landlock events in real time.
This series applies on top of my next branch [2].
Changes since RFC v1:
https://lore.kernel.org/r/20250523165741.693976-1-mic@digikod.net
- New patches 1-4: split struct landlock_domain from struct
landlock_ruleset; split denial logging from audit into common
framework with CONFIG_SECURITY_LANDLOCK_LOG.
- Patch 5 (was v1 3/5): removed WARN_ON() (pointed out by Steven
Rostedt).
- New patch 6: added create_ruleset and free_ruleset tracepoints
(split from the v1 add_rule_fs tracepoint patch).
- Patch 7 (was v1 4/5): added add_rule_net tracepoint, used
ruleset Landlock ID instead of kernel pointer, added version
field to struct landlock_ruleset, differentiated d_absolute_path()
error cases (suggested by Tingmao Wang), moved
DEFINE_FREE(__putname) to include/linux/fs.h (noticed by Tingmao
Wang).
- New patch 8: added restrict_self and free_domain tracepoints.
- Patch 9 (was v1 5/5): merged find-rule consolidation, added
check_rule_net tracepoint.
- New patch 10: split audit_net.sk fix with Fixes: tag.
- New patches 11-12: added denial tracepoints for filesystem,
network, ptrace, and scope operations.
- New patches 13-17: split selftests into per-feature commits with
documentation.
Regards,
Mickaël Salaün (17):
landlock: Prepare ruleset and domain type split
landlock: Move domain query functions to domain.c
landlock: Split struct landlock_domain from struct landlock_ruleset
landlock: Split denial logging from audit into common framework
tracing: Add __print_untrusted_str()
landlock: Add create_ruleset and free_ruleset tracepoints
landlock: Add landlock_add_rule_fs and landlock_add_rule_net
tracepoints
landlock: Add restrict_self and free_domain tracepoints
landlock: Add tracepoints for rule checking
landlock: Set audit_net.sk for socket access checks
landlock: Add landlock_deny_access_fs and landlock_deny_access_net
landlock: Add tracepoints for ptrace and scope denials
selftests/landlock: Add trace event test infrastructure and tests
selftests/landlock: Add filesystem tracepoint tests
selftests/landlock: Add network tracepoint tests
selftests/landlock: Add scope and ptrace tracepoint tests
landlock: Document tracepoints
Documentation/admin-guide/LSM/landlock.rst | 210 ++-
Documentation/security/landlock.rst | 35 +-
Documentation/trace/events-landlock.rst | 160 +++
Documentation/trace/index.rst | 1 +
Documentation/userspace-api/landlock.rst | 11 +-
MAINTAINERS | 1 +
include/linux/fs.h | 1 +
include/linux/trace_events.h | 2 +
include/trace/events/landlock.h | 574 ++++++++
include/trace/stages/stage3_trace_output.h | 4 +
include/trace/stages/stage7_class_define.h | 1 +
kernel/trace/trace_output.c | 41 +
security/landlock/Kconfig | 5 +
security/landlock/Makefile | 10 +-
security/landlock/access.h | 4 +-
security/landlock/cred.c | 6 +-
security/landlock/cred.h | 29 +-
security/landlock/domain.c | 445 ++++++-
security/landlock/domain.h | 148 ++-
security/landlock/fs.c | 201 ++-
security/landlock/fs.h | 30 +
security/landlock/id.h | 6 +-
security/landlock/{audit.c => log.c} | 261 +++-
security/landlock/{audit.h => log.h} | 25 +-
security/landlock/net.c | 40 +-
security/landlock/ruleset.c | 528 ++------
security/landlock/ruleset.h | 237 ++--
security/landlock/syscalls.c | 36 +-
security/landlock/task.c | 22 +-
tools/testing/selftests/landlock/audit.h | 35 +-
tools/testing/selftests/landlock/audit_test.c | 187 +++
tools/testing/selftests/landlock/common.h | 47 +
tools/testing/selftests/landlock/config | 2 +
tools/testing/selftests/landlock/fs_test.c | 218 +++
tools/testing/selftests/landlock/net_test.c | 547 +++++++-
.../testing/selftests/landlock/ptrace_test.c | 164 +++
.../landlock/scoped_abstract_unix_test.c | 195 +++
.../selftests/landlock/scoped_signal_test.c | 150 +++
tools/testing/selftests/landlock/trace.h | 640 +++++++++
.../selftests/landlock/trace_fs_test.c | 390 ++++++
tools/testing/selftests/landlock/trace_test.c | 1168 +++++++++++++++++
tools/testing/selftests/landlock/true.c | 10 +
42 files changed, 5991 insertions(+), 836 deletions(-)
create mode 100644 Documentation/trace/events-landlock.rst
create mode 100644 include/trace/events/landlock.h
rename security/landlock/{audit.c => log.c} (73%)
rename security/landlock/{audit.h => log.h} (74%)
create mode 100644 tools/testing/selftests/landlock/trace.h
create mode 100644 tools/testing/selftests/landlock/trace_fs_test.c
create mode 100644 tools/testing/selftests/landlock/trace_test.c
base-commit: 8c6a27e02bc55ab110d1828610048b19f903aaec
--
2.53.0
^ permalink raw reply
* [PATCH 1/3] crypto: public_key: Remove check for valid hash_algo for ML-DSA keys
From: Stefan Berger @ 2026-04-05 23:12 UTC (permalink / raw)
To: linux-integrity, linux-security-module
Cc: linux-kernel, zohar, roberto.sassu, ebiggers, Stefan Berger,
David Howells, Lukas Wunner, Ignat Korchagin, keyrings,
linux-crypto
In-Reply-To: <20260405231224.4008298-1-stefanb@linux.ibm.com>
Remove the check for the hash_algo since ML-DSA is only used in pure mode
and there is no relevance of a hash_algo for the input data.
Cc: David Howells <dhowells@redhat.com>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Ignat Korchagin <ignat@linux.win>
Cc: keyrings@vger.kernel.org
Cc: linux-crypto@vger.kernel.org
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
---
crypto/asymmetric_keys/public_key.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 09a0b83d5d77..df6918a77ab8 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -147,11 +147,6 @@ software_key_determine_akcipher(const struct public_key *pkey,
strcmp(pkey->pkey_algo, "mldsa87") == 0) {
if (strcmp(encoding, "raw") != 0)
return -EINVAL;
- if (!hash_algo)
- return -EINVAL;
- if (strcmp(hash_algo, "none") != 0 &&
- strcmp(hash_algo, "sha512") != 0)
- return -EINVAL;
} else {
/* Unknown public key algorithm */
return -ENOPKG;
--
2.53.0
^ permalink raw reply related
* [PATCH 3/3] integrity: Add support for sigv3 verification using ML-DSA keys
From: Stefan Berger @ 2026-04-05 23:12 UTC (permalink / raw)
To: linux-integrity, linux-security-module
Cc: linux-kernel, zohar, roberto.sassu, ebiggers, Stefan Berger
In-Reply-To: <20260405231224.4008298-1-stefanb@linux.ibm.com>
Add support for sigv3 signature verification using ML-DSA in pure mode.
When a sigv3 signature is verified, first check whether the key to use
for verification is an ML-DSA key and therefore uses a hashless signature
verification scheme. The hashless signature verification method uses the
ima_file_id structure directly for signature verification rather than
its digest.
Suggested-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
---
security/integrity/digsig_asymmetric.c | 84 ++++++++++++++++++++++++--
1 file changed, 79 insertions(+), 5 deletions(-)
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index e29ed73f15cd..e25534117c16 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -190,17 +190,91 @@ static int calc_file_id_hash(enum evm_ima_xattr_type type,
return rc;
}
+/*
+ * asymmetric_verify_v3_hashless - Use hashless signature verification on sigv3
+ * @key: The key to use for signature verification
+ * @pk: The associated public key
+ * @encoding: The encoding the key type uses
+ * @sig: The signature
+ * @siglen: The length of the xattr signature
+ * @algo: The hash algorithm
+ * @digest: The file digest
+ *
+ * Create an ima_file_id structure and use it for signature verification
+ * directly. This can be used for ML-DSA in pure mode for example.
+ */
+static int asymmetric_verify_v3_hashless(struct key *key,
+ const struct public_key *pk,
+ const char *encoding,
+ const char *sig, int siglen,
+ u8 algo,
+ const u8 *digest)
+{
+ struct signature_v2_hdr *hdr = (struct signature_v2_hdr *)sig;
+ struct ima_file_id file_id = {
+ .hash_type = hdr->type,
+ .hash_algorithm = algo,
+ };
+ size_t digest_size = hash_digest_size[algo];
+ struct public_key_signature pks = {
+ .m = (u8 *)&file_id,
+ .m_size = sizeof(file_id) - (HASH_MAX_DIGESTSIZE - digest_size),
+ .s = hdr->sig,
+ .s_size = siglen - sizeof(*hdr),
+ .pkey_algo = pk->pkey_algo,
+ .hash_algo = hash_algo_name[hdr->hash_algo],
+ .encoding = encoding,
+ };
+ int ret;
+
+ if (hdr->type != IMA_VERITY_DIGSIG &&
+ hdr->type != EVM_IMA_XATTR_DIGSIG &&
+ hdr->type != EVM_XATTR_PORTABLE_DIGSIG)
+ return -EINVAL;
+
+ if (pks.s_size != be16_to_cpu(hdr->sig_size))
+ return -EBADMSG;
+
+ memcpy(file_id.hash, digest, digest_size);
+
+ ret = verify_signature(key, &pks);
+ pr_debug("%s() = %d\n", __func__, ret);
+ return ret;
+}
+
int asymmetric_verify_v3(struct key *keyring, const char *sig, int siglen,
const char *data, int datalen, u8 algo)
{
struct signature_v2_hdr *hdr = (struct signature_v2_hdr *)sig;
struct ima_max_digest_data hash;
+ const struct public_key *pk;
+ struct key *key;
int rc;
- rc = calc_file_id_hash(hdr->type, algo, data, &hash);
- if (rc)
- return -EINVAL;
+ if (siglen <= sizeof(*hdr))
+ return -EBADMSG;
+
+ key = request_asymmetric_key(keyring, be32_to_cpu(hdr->keyid));
+ if (IS_ERR(key))
+ return PTR_ERR(key);
- return asymmetric_verify(keyring, sig, siglen, hash.digest,
- hash.hdr.length);
+ pk = asymmetric_key_public_key(key);
+ if (!strncmp(pk->pkey_algo, "mldsa", 5)) {
+ rc = asymmetric_verify_v3_hashless(key, pk, "raw",
+ sig, siglen, algo, data);
+ } else {
+ rc = calc_file_id_hash(hdr->type, algo, data, &hash);
+ if (rc) {
+ rc = -EINVAL;
+ goto err_exit;
+ }
+
+ rc = asymmetric_verify_common(key, pk, sig, siglen, hash.digest,
+ hash.hdr.length);
+ }
+
+err_exit:
+ key_put(key);
+
+ return rc;
}
--
2.53.0
^ permalink raw reply related
* [PATCH 2/3] integrity: Refactor asymmetric_verify for reusability
From: Stefan Berger @ 2026-04-05 23:12 UTC (permalink / raw)
To: linux-integrity, linux-security-module
Cc: linux-kernel, zohar, roberto.sassu, ebiggers, Stefan Berger
In-Reply-To: <20260405231224.4008298-1-stefanb@linux.ibm.com>
Refactor asymmetric_verify for reusability. Have it call
asymmetric_verify_common with the signature verification key and the
public_key structure as parameters. sigv3 support for ML-DSA will need to
check the public key type first to decide how to do the signature
verification and therefore will have these parameters available for
calling asymmetric_verify_common.
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
---
security/integrity/digsig_asymmetric.c | 42 +++++++++++++++++---------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index 6e68ec3becbd..e29ed73f15cd 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -79,18 +79,15 @@ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid)
return key;
}
-int asymmetric_verify(struct key *keyring, const char *sig,
- int siglen, const char *data, int datalen)
+static int asymmetric_verify_common(const struct key *key,
+ const struct public_key *pk,
+ const char *sig, int siglen,
+ const char *data, int datalen)
{
- struct public_key_signature pks;
struct signature_v2_hdr *hdr = (struct signature_v2_hdr *)sig;
- const struct public_key *pk;
- struct key *key;
+ struct public_key_signature pks;
int ret;
- if (siglen <= sizeof(*hdr))
- return -EBADMSG;
-
siglen -= sizeof(*hdr);
if (siglen != be16_to_cpu(hdr->sig_size))
@@ -99,15 +96,10 @@ int asymmetric_verify(struct key *keyring, const char *sig,
if (hdr->hash_algo >= HASH_ALGO__LAST)
return -ENOPKG;
- key = request_asymmetric_key(keyring, be32_to_cpu(hdr->keyid));
- if (IS_ERR(key))
- return PTR_ERR(key);
-
memset(&pks, 0, sizeof(pks));
pks.hash_algo = hash_algo_name[hdr->hash_algo];
- pk = asymmetric_key_public_key(key);
pks.pkey_algo = pk->pkey_algo;
if (!strcmp(pk->pkey_algo, "rsa")) {
pks.encoding = "pkcs1";
@@ -127,11 +119,33 @@ int asymmetric_verify(struct key *keyring, const char *sig,
pks.s_size = siglen;
ret = verify_signature(key, &pks);
out:
- key_put(key);
pr_debug("%s() = %d\n", __func__, ret);
return ret;
}
+int asymmetric_verify(struct key *keyring, const char *sig,
+ int siglen, const char *data, int datalen)
+{
+ struct signature_v2_hdr *hdr = (struct signature_v2_hdr *)sig;
+ const struct public_key *pk;
+ struct key *key;
+ int ret;
+
+ if (siglen <= sizeof(*hdr))
+ return -EBADMSG;
+
+ key = request_asymmetric_key(keyring, be32_to_cpu(hdr->keyid));
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ pk = asymmetric_key_public_key(key);
+
+ ret = asymmetric_verify_common(key, pk, sig, siglen, data, datalen);
+
+ key_put(key);
+
+ return ret;
+}
+
/*
* calc_file_id_hash - calculate the hash of the ima_file_id struct data
* @type: xattr type [enum evm_ima_xattr_type]
--
2.53.0
^ permalink raw reply related
* [PATCH 0/3] Add support for ML-DSA signature for EVM and IMA
From: Stefan Berger @ 2026-04-05 23:12 UTC (permalink / raw)
To: linux-integrity, linux-security-module
Cc: linux-kernel, zohar, roberto.sassu, ebiggers, Stefan Berger
Based on IMA sigv3 type of signatures, add support for ML-DSA signature
for EVM and IMA. Use the existing ML-DSA hashless signing mode (pure mode).
Stefan
Stefan Berger (3):
crypto: public_key: Remove check for valid hash_algo for ML-DSA keys
integrity: Refactor asymmetric_verify for reusability
integrity: Add support for sigv3 verification using ML-DSA keys
crypto/asymmetric_keys/public_key.c | 5 -
security/integrity/digsig_asymmetric.c | 126 +++++++++++++++++++++----
2 files changed, 107 insertions(+), 24 deletions(-)
base-commit: 82bbd447199ff1441031d2eaf9afe041550cf525
--
2.53.0
^ permalink raw reply
* Re: [PATCH v4 2/3] lsm: add backing_file LSM hooks
From: Serge E. Hallyn @ 2026-04-05 3:12 UTC (permalink / raw)
To: Paul Moore
Cc: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs, Amir Goldstein, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-7-paul@paul-moore.com>
On Thu, Apr 02, 2026 at 11:08:34PM -0400, Paul Moore wrote:
> Stacked filesystems such as overlayfs do not currently provide the
> necessary mechanisms for LSMs to properly enforce access controls on the
> mmap() and mprotect() operations. In order to resolve this gap, a LSM
> security blob is being added to the backing_file struct and the following
> new LSM hooks are being created:
>
> security_backing_file_alloc()
> security_backing_file_free()
> security_mmap_backing_file()
>
> The first two hooks are to manage the lifecycle of the LSM security blob
> in the backing_file struct, while the third provides a new mmap() access
> control point for the underlying backing file. It is also expected that
> LSMs will likely want to update their security_file_mprotect() callback
> to address issues with their mprotect() controls, but that does not
> require a change to the security_file_mprotect() LSM hook.
>
> There are a three other small changes to support these new LSM hooks:
> * Pass the user file associated with a backing file down to
> alloc_empty_backing_file() so it can be included in the
> security_backing_file_alloc() hook.
> * Add getter and setter functions for the backing_file struct LSM blob
> as the backing_file struct remains private to fs/file_table.c.
> * Constify the file struct field in the LSM common_audit_data struct to
> better support LSMs that need to pass a const file struct pointer into
> the common LSM audit code.
>
> Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
> and supplying a fixup.
>
> Cc: stable@vger.kernel.org
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-unionfs@vger.kernel.org
> Cc: linux-erofs@lists.ozlabs.org
> Signed-off-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
> ---
> fs/backing-file.c | 18 ++++--
> fs/erofs/ishare.c | 10 +++-
> fs/file_table.c | 27 +++++++--
> fs/fuse/passthrough.c | 2 +-
> fs/internal.h | 3 +-
> fs/overlayfs/dir.c | 2 +-
> fs/overlayfs/file.c | 2 +-
> include/linux/backing-file.h | 4 +-
> include/linux/fs.h | 13 +++++
> include/linux/lsm_audit.h | 2 +-
> include/linux/lsm_hook_defs.h | 5 ++
> include/linux/lsm_hooks.h | 1 +
> include/linux/security.h | 22 ++++++++
> security/lsm.h | 1 +
> security/lsm_init.c | 9 +++
> security/security.c | 102 ++++++++++++++++++++++++++++++++++
> 16 files changed, 206 insertions(+), 17 deletions(-)
>
> diff --git a/fs/backing-file.c b/fs/backing-file.c
> index 45da8600d564..1f3bbfc75882 100644
> --- a/fs/backing-file.c
> +++ b/fs/backing-file.c
> @@ -12,6 +12,7 @@
> #include <linux/backing-file.h>
> #include <linux/splice.h>
> #include <linux/mm.h>
> +#include <linux/security.h>
>
> #include "internal.h"
>
> @@ -29,14 +30,15 @@
> * returned file into a container structure that also stores the stacked
> * file's path, which can be retrieved using backing_file_user_path().
> */
> -struct file *backing_file_open(const struct path *user_path, int flags,
> +struct file *backing_file_open(const struct file *user_file, int flags,
> const struct path *real_path,
> const struct cred *cred)
> {
> + const struct path *user_path = &user_file->f_path;
> struct file *f;
> int error;
>
> - f = alloc_empty_backing_file(flags, cred);
> + f = alloc_empty_backing_file(flags, cred, user_file);
> if (IS_ERR(f))
> return f;
>
> @@ -52,15 +54,16 @@ struct file *backing_file_open(const struct path *user_path, int flags,
> }
> EXPORT_SYMBOL_GPL(backing_file_open);
>
> -struct file *backing_tmpfile_open(const struct path *user_path, int flags,
> +struct file *backing_tmpfile_open(const struct file *user_file, int flags,
> const struct path *real_parentpath,
> umode_t mode, const struct cred *cred)
> {
> struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
> + const struct path *user_path = &user_file->f_path;
> struct file *f;
> int error;
>
> - f = alloc_empty_backing_file(flags, cred);
> + f = alloc_empty_backing_file(flags, cred, user_file);
> if (IS_ERR(f))
> return f;
>
> @@ -336,8 +339,13 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
>
> vma_set_file(vma, file);
>
> - scoped_with_creds(ctx->cred)
> + scoped_with_creds(ctx->cred) {
> + ret = security_mmap_backing_file(vma, file, user_file);
> + if (ret)
> + return ret;
> +
> ret = vfs_mmap(vma->vm_file, vma);
> + }
>
> if (ctx->accessed)
> ctx->accessed(user_file);
> diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
> index ec433bacc592..6ed66b17359b 100644
> --- a/fs/erofs/ishare.c
> +++ b/fs/erofs/ishare.c
> @@ -4,6 +4,7 @@
> */
> #include <linux/xxhash.h>
> #include <linux/mount.h>
> +#include <linux/security.h>
> #include "internal.h"
> #include "xattr.h"
>
> @@ -106,7 +107,8 @@ static int erofs_ishare_file_open(struct inode *inode, struct file *file)
>
> if (file->f_flags & O_DIRECT)
> return -EINVAL;
> - realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred());
> + realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred(),
> + file);
> if (IS_ERR(realfile))
> return PTR_ERR(realfile);
> ihold(sharedinode);
> @@ -150,8 +152,14 @@ static ssize_t erofs_ishare_file_read_iter(struct kiocb *iocb,
> static int erofs_ishare_mmap(struct file *file, struct vm_area_struct *vma)
> {
> struct file *realfile = file->private_data;
> + int err;
>
> vma_set_file(vma, realfile);
> +
> + err = security_mmap_backing_file(vma, realfile, file);
> + if (err)
> + return err;
> +
> return generic_file_readonly_mmap(file, vma);
> }
>
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 3b3792903185..d19d879b6efc 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -50,6 +50,9 @@ struct backing_file {
> struct path user_path;
> freeptr_t bf_freeptr;
> };
> +#ifdef CONFIG_SECURITY
> + void *security;
> +#endif
> };
>
> #define backing_file(f) container_of(f, struct backing_file, file)
> @@ -66,8 +69,21 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
> }
> EXPORT_SYMBOL_GPL(backing_file_set_user_path);
>
> +#ifdef CONFIG_SECURITY
> +void *backing_file_security(const struct file *f)
> +{
> + return backing_file(f)->security;
> +}
> +
> +void backing_file_set_security(struct file *f, void *security)
> +{
> + backing_file(f)->security = security;
> +}
> +#endif /* CONFIG_SECURITY */
> +
> static inline void backing_file_free(struct backing_file *ff)
> {
> + security_backing_file_free(&ff->file);
> path_put(&ff->user_path);
> kmem_cache_free(bfilp_cachep, ff);
> }
> @@ -288,10 +304,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> return f;
> }
>
> -static int init_backing_file(struct backing_file *ff)
> +static int init_backing_file(struct backing_file *ff,
> + const struct file *user_file)
> {
> memset(&ff->user_path, 0, sizeof(ff->user_path));
> - return 0;
> + backing_file_set_security(&ff->file, NULL);
> + return security_backing_file_alloc(&ff->file, user_file);
> }
>
> /*
> @@ -301,7 +319,8 @@ static int init_backing_file(struct backing_file *ff)
> * This is only for kernel internal use, and the allocate file must not be
> * installed into file tables or such.
> */
> -struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
> +struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
> + const struct file *user_file)
> {
> struct backing_file *ff;
> int error;
> @@ -318,7 +337,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
>
> /* The f_mode flags must be set before fput(). */
> ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
> - error = init_backing_file(ff);
> + error = init_backing_file(ff, user_file);
> if (unlikely(error)) {
> fput(&ff->file);
> return ERR_PTR(error);
> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> index 72de97c03d0e..f2d08ac2459b 100644
> --- a/fs/fuse/passthrough.c
> +++ b/fs/fuse/passthrough.c
> @@ -167,7 +167,7 @@ struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id)
> goto out;
>
> /* Allocate backing file per fuse file to store fuse path */
> - backing_file = backing_file_open(&file->f_path, file->f_flags,
> + backing_file = backing_file_open(file, file->f_flags,
> &fb->file->f_path, fb->cred);
> err = PTR_ERR(backing_file);
> if (IS_ERR(backing_file)) {
> diff --git a/fs/internal.h b/fs/internal.h
> index cbc384a1aa09..77e90e4124e0 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -106,7 +106,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
> */
> struct file *alloc_empty_file(int flags, const struct cred *cred);
> struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
> -struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
> +struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
> + const struct file *user_file);
> void backing_file_set_user_path(struct file *f, const struct path *path);
>
> static inline void file_put_write_access(struct file *file)
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index ff3dbd1ca61f..f2f20a611af3 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -1374,7 +1374,7 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
> return PTR_ERR(cred);
>
> ovl_path_upper(dentry->d_parent, &realparentpath);
> - realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath,
> + realfile = backing_tmpfile_open(file, flags, &realparentpath,
> mode, current_cred());
> err = PTR_ERR_OR_ZERO(realfile);
> pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err);
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 97bed2286030..27cc07738f33 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -48,7 +48,7 @@ static struct file *ovl_open_realfile(const struct file *file,
> if (!inode_owner_or_capable(real_idmap, realinode))
> flags &= ~O_NOATIME;
>
> - realfile = backing_file_open(file_user_path(file),
> + realfile = backing_file_open(file,
> flags, realpath, current_cred());
> }
> }
> diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
> index 1476a6ed1bfd..c939cd222730 100644
> --- a/include/linux/backing-file.h
> +++ b/include/linux/backing-file.h
> @@ -18,10 +18,10 @@ struct backing_file_ctx {
> void (*end_write)(struct kiocb *iocb, ssize_t);
> };
>
> -struct file *backing_file_open(const struct path *user_path, int flags,
> +struct file *backing_file_open(const struct file *user_file, int flags,
> const struct path *real_path,
> const struct cred *cred);
> -struct file *backing_tmpfile_open(const struct path *user_path, int flags,
> +struct file *backing_tmpfile_open(const struct file *user_file, int flags,
> const struct path *real_parentpath,
> umode_t mode, const struct cred *cred);
> ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 8b3dd145b25e..d0d0e8f55589 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2475,6 +2475,19 @@ struct file *dentry_create(struct path *path, int flags, umode_t mode,
> const struct cred *cred);
> const struct path *backing_file_user_path(const struct file *f);
>
> +#ifdef CONFIG_SECURITY
> +void *backing_file_security(const struct file *f);
> +void backing_file_set_security(struct file *f, void *security);
> +#else
> +static inline void *backing_file_security(const struct file *f)
> +{
> + return NULL;
> +}
> +static inline void backing_file_set_security(struct file *f, void *security)
> +{
> +}
> +#endif /* CONFIG_SECURITY */
> +
> /*
> * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
> * stored in ->vm_file is a backing file whose f_inode is on the underlying
> diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
> index 382c56a97bba..584db296e43b 100644
> --- a/include/linux/lsm_audit.h
> +++ b/include/linux/lsm_audit.h
> @@ -94,7 +94,7 @@ struct common_audit_data {
> #endif
> char *kmod_name;
> struct lsm_ioctlop_audit *op;
> - struct file *file;
> + const struct file *file;
> struct lsm_ibpkey_audit *ibpkey;
> struct lsm_ibendport_audit *ibendport;
> int reason;
> diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
> index 8c42b4bde09c..b4958167e381 100644
> --- a/include/linux/lsm_hook_defs.h
> +++ b/include/linux/lsm_hook_defs.h
> @@ -191,6 +191,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
> LSM_HOOK(int, 0, file_alloc_security, struct file *file)
> LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
> LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
> +LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
> + const struct file *user_file)
> +LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
> LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
> unsigned long arg)
> LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
> @@ -198,6 +201,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
> LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
> LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
> unsigned long prot, unsigned long flags)
> +LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
> + struct file *backing_file, struct file *user_file)
> LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
> unsigned long reqprot, unsigned long prot)
> LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
> diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
> index d48bf0ad26f4..b4f8cad53ddb 100644
> --- a/include/linux/lsm_hooks.h
> +++ b/include/linux/lsm_hooks.h
> @@ -104,6 +104,7 @@ struct security_hook_list {
> struct lsm_blob_sizes {
> unsigned int lbs_cred;
> unsigned int lbs_file;
> + unsigned int lbs_backing_file;
> unsigned int lbs_ib;
> unsigned int lbs_inode;
> unsigned int lbs_sock;
> diff --git a/include/linux/security.h b/include/linux/security.h
> index ee88dd2d2d1f..8d2d4856934e 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -472,11 +472,17 @@ int security_file_permission(struct file *file, int mask);
> int security_file_alloc(struct file *file);
> void security_file_release(struct file *file);
> void security_file_free(struct file *file);
> +int security_backing_file_alloc(struct file *backing_file,
> + const struct file *user_file);
> +void security_backing_file_free(struct file *backing_file);
> int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
> int security_file_ioctl_compat(struct file *file, unsigned int cmd,
> unsigned long arg);
> int security_mmap_file(struct file *file, unsigned long prot,
> unsigned long flags);
> +int security_mmap_backing_file(struct vm_area_struct *vma,
> + struct file *backing_file,
> + struct file *user_file);
> int security_mmap_addr(unsigned long addr);
> int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
> unsigned long prot);
> @@ -1141,6 +1147,15 @@ static inline void security_file_release(struct file *file)
> static inline void security_file_free(struct file *file)
> { }
>
> +static inline int security_backing_file_alloc(struct file *backing_file,
> + const struct file *user_file)
> +{
> + return 0;
> +}
> +
> +static inline void security_backing_file_free(struct file *backing_file)
> +{ }
> +
> static inline int security_file_ioctl(struct file *file, unsigned int cmd,
> unsigned long arg)
> {
> @@ -1160,6 +1175,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
> return 0;
> }
>
> +static inline int security_mmap_backing_file(struct vm_area_struct *vma,
> + struct file *backing_file,
> + struct file *user_file)
> +{
> + return 0;
> +}
> +
> static inline int security_mmap_addr(unsigned long addr)
> {
> return cap_mmap_addr(addr);
> diff --git a/security/lsm.h b/security/lsm.h
> index db77cc83e158..32f808ad4335 100644
> --- a/security/lsm.h
> +++ b/security/lsm.h
> @@ -29,6 +29,7 @@ extern struct lsm_blob_sizes blob_sizes;
>
> /* LSM blob caches */
> extern struct kmem_cache *lsm_file_cache;
> +extern struct kmem_cache *lsm_backing_file_cache;
> extern struct kmem_cache *lsm_inode_cache;
>
> /* LSM blob allocators */
> diff --git a/security/lsm_init.c b/security/lsm_init.c
> index 573e2a7250c4..7c0fd17f1601 100644
> --- a/security/lsm_init.c
> +++ b/security/lsm_init.c
> @@ -293,6 +293,8 @@ static void __init lsm_prepare(struct lsm_info *lsm)
> blobs = lsm->blobs;
> lsm_blob_size_update(&blobs->lbs_cred, &blob_sizes.lbs_cred);
> lsm_blob_size_update(&blobs->lbs_file, &blob_sizes.lbs_file);
> + lsm_blob_size_update(&blobs->lbs_backing_file,
> + &blob_sizes.lbs_backing_file);
> lsm_blob_size_update(&blobs->lbs_ib, &blob_sizes.lbs_ib);
> /* inode blob gets an rcu_head in addition to LSM blobs. */
> if (blobs->lbs_inode && blob_sizes.lbs_inode == 0)
> @@ -441,6 +443,8 @@ int __init security_init(void)
> if (lsm_debug) {
> lsm_pr("blob(cred) size %d\n", blob_sizes.lbs_cred);
> lsm_pr("blob(file) size %d\n", blob_sizes.lbs_file);
> + lsm_pr("blob(backing_file) size %d\n",
> + blob_sizes.lbs_backing_file);
> lsm_pr("blob(ib) size %d\n", blob_sizes.lbs_ib);
> lsm_pr("blob(inode) size %d\n", blob_sizes.lbs_inode);
> lsm_pr("blob(ipc) size %d\n", blob_sizes.lbs_ipc);
> @@ -462,6 +466,11 @@ int __init security_init(void)
> lsm_file_cache = kmem_cache_create("lsm_file_cache",
> blob_sizes.lbs_file, 0,
> SLAB_PANIC, NULL);
> + if (blob_sizes.lbs_backing_file)
> + lsm_backing_file_cache = kmem_cache_create(
> + "lsm_backing_file_cache",
> + blob_sizes.lbs_backing_file,
> + 0, SLAB_PANIC, NULL);
> if (blob_sizes.lbs_inode)
> lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
> blob_sizes.lbs_inode, 0,
> diff --git a/security/security.c b/security/security.c
> index a26c1474e2e4..048560ef6a1a 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -82,6 +82,7 @@ const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
> struct lsm_blob_sizes blob_sizes;
>
> struct kmem_cache *lsm_file_cache;
> +struct kmem_cache *lsm_backing_file_cache;
> struct kmem_cache *lsm_inode_cache;
>
> #define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
> @@ -173,6 +174,30 @@ static int lsm_file_alloc(struct file *file)
> return 0;
> }
>
> +/**
> + * lsm_backing_file_alloc - allocate a composite backing file blob
> + * @backing_file: the backing file
> + *
> + * Allocate the backing file blob for all the modules.
> + *
> + * Returns 0, or -ENOMEM if memory can't be allocated.
> + */
> +static int lsm_backing_file_alloc(struct file *backing_file)
> +{
> + void *blob;
> +
> + if (!lsm_backing_file_cache) {
> + backing_file_set_security(backing_file, NULL);
> + return 0;
> + }
> +
> + blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
> + backing_file_set_security(backing_file, blob);
> + if (!blob)
> + return -ENOMEM;
> + return 0;
> +}
> +
> /**
> * lsm_blob_alloc - allocate a composite blob
> * @dest: the destination for the blob
> @@ -2418,6 +2443,57 @@ void security_file_free(struct file *file)
> }
> }
>
> +/**
> + * security_backing_file_alloc() - Allocate and setup a backing file blob
> + * @backing_file: the backing file
> + * @user_file: the associated user visible file
> + *
> + * Allocate a backing file LSM blob and perform any necessary initialization of
> + * the LSM blob. There will be some operations where the LSM will not have
> + * access to @user_file after this point, so any important state associated
> + * with @user_file that is important to the LSM should be captured in the
> + * backing file's LSM blob.
> + *
> + * LSM's should avoid taking a reference to @user_file in this hook as it will
> + * result in problems later when the system attempts to drop/put the file
> + * references due to a circular dependency.
> + *
> + * Return: Return 0 if the hook is successful, negative values otherwise.
> + */
> +int security_backing_file_alloc(struct file *backing_file,
> + const struct file *user_file)
> +{
> + int rc;
> +
> + rc = lsm_backing_file_alloc(backing_file);
> + if (rc)
> + return rc;
> + rc = call_int_hook(backing_file_alloc, backing_file, user_file);
> + if (unlikely(rc))
> + security_backing_file_free(backing_file);
> +
> + return rc;
> +}
> +
> +/**
> + * security_backing_file_free() - Free a backing file blob
> + * @backing_file: the backing file
> + *
> + * Free any LSM state associate with a backing file's LSM blob, including the
> + * blob itself.
> + */
> +void security_backing_file_free(struct file *backing_file)
> +{
> + void *blob = backing_file_security(backing_file);
> +
> + call_void_hook(backing_file_free, backing_file);
> +
> + if (blob) {
> + backing_file_set_security(backing_file, NULL);
> + kmem_cache_free(lsm_backing_file_cache, blob);
> + }
> +}
> +
> /**
> * security_file_ioctl() - Check if an ioctl is allowed
> * @file: associated file
> @@ -2506,6 +2582,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
> flags);
> }
>
> +/**
> + * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
> + * @vma: the vm_area_struct for the mmap'd region
> + * @backing_file: the backing file being mmap'd
> + * @user_file: the user file being mmap'd
> + *
> + * Check permissions for a mmap operation on a stacked filesystem. This hook
> + * is called after the security_mmap_file() and is responsible for authorizing
> + * the mmap on @backing_file. It is important to note that the mmap operation
> + * on @user_file has already been authorized and the @vma->vm_file has been
> + * set to @backing_file.
> + *
> + * Return: Returns 0 if permission is granted.
> + */
> +int security_mmap_backing_file(struct vm_area_struct *vma,
> + struct file *backing_file,
> + struct file *user_file)
> +{
> + /* recommended by the stackable filesystem devs */
> + if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
> + return -EIO;
> +
> + return call_int_hook(mmap_backing_file, vma, backing_file, user_file);
> +}
> +EXPORT_SYMBOL_GPL(security_mmap_backing_file);
> +
> /**
> * security_mmap_addr() - Check if mmap'ing an address is allowed
> * @addr: address
> --
> 2.53.0
>
^ permalink raw reply
* Re: [PATCH v4 1/3] fs: prepare for adding LSM blob to backing_file
From: Serge E. Hallyn @ 2026-04-05 0:14 UTC (permalink / raw)
To: Paul Moore
Cc: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs, Amir Goldstein, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-6-paul@paul-moore.com>
On Thu, Apr 02, 2026 at 11:08:33PM -0400, Paul Moore wrote:
> From: Amir Goldstein <amir73il@gmail.com>
>
> In preparation to adding LSM blob to backing_file struct, factor out
> helpers init_backing_file() and backing_file_free().
>
> Cc: stable@vger.kernel.org
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-unionfs@vger.kernel.org
> Cc: linux-erofs@lists.ozlabs.org
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> [PM: use the term "LSM blob", fix comment style to match file]
> Signed-off-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
> ---
> fs/file_table.c | 22 ++++++++++++++++++++--
> 1 file changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/fs/file_table.c b/fs/file_table.c
> index aaa5faaace1e..3b3792903185 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -66,6 +66,12 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
> }
> EXPORT_SYMBOL_GPL(backing_file_set_user_path);
>
> +static inline void backing_file_free(struct backing_file *ff)
> +{
> + path_put(&ff->user_path);
> + kmem_cache_free(bfilp_cachep, ff);
> +}
> +
> static inline void file_free(struct file *f)
> {
> security_file_free(f);
> @@ -73,8 +79,7 @@ static inline void file_free(struct file *f)
> percpu_counter_dec(&nr_files);
> put_cred(f->f_cred);
> if (unlikely(f->f_mode & FMODE_BACKING)) {
> - path_put(backing_file_user_path(f));
> - kmem_cache_free(bfilp_cachep, backing_file(f));
> + backing_file_free(backing_file(f));
> } else {
> kmem_cache_free(filp_cachep, f);
> }
> @@ -283,6 +288,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> return f;
> }
>
> +static int init_backing_file(struct backing_file *ff)
> +{
> + memset(&ff->user_path, 0, sizeof(ff->user_path));
> + return 0;
> +}
> +
> /*
> * Variant of alloc_empty_file() that allocates a backing_file container
> * and doesn't check and modify nr_files.
> @@ -305,7 +316,14 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
> return ERR_PTR(error);
> }
>
> + /* The f_mode flags must be set before fput(). */
> ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
> + error = init_backing_file(ff);
> + if (unlikely(error)) {
> + fput(&ff->file);
> + return ERR_PTR(error);
> + }
> +
> return &ff->file;
> }
> EXPORT_SYMBOL_GPL(alloc_empty_backing_file);
> --
> 2.53.0
>
^ permalink raw reply
* [PATCH v1 2/2] landlock: Allow TSYNC with LOG_SUBDOMAINS_OFF and fd=-1
From: Mickaël Salaün @ 2026-04-04 8:49 UTC (permalink / raw)
To: Günther Noack
Cc: Mickaël Salaün, linux-security-module, stable
In-Reply-To: <20260404085001.1604405-1-mic@digikod.net>
LANDLOCK_RESTRICT_SELF_TSYNC does not allow
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with ruleset_fd=-1, preventing
a multithreaded process from atomically propagating subdomain log muting
to all threads without creating a domain layer. Relax the fd=-1
condition to accept TSYNC alongside LOG_SUBDOMAINS_OFF, and update the
documentation accordingly.
Add flag validation tests for all TSYNC combinations with ruleset_fd=-1,
and audit tests verifying both transition directions: muting via TSYNC
(logged to not logged) and override via TSYNC (not logged to logged).
Cc: Günther Noack <gnoack@google.com>
Cc: stable@vger.kernel.org
Fixes: 42fc7e6543f6 ("landlock: Multithreading support for landlock_restrict_self()")
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
include/uapi/linux/landlock.h | 4 +-
security/landlock/syscalls.c | 14 +-
tools/testing/selftests/landlock/audit_test.c | 233 ++++++++++++++++++
tools/testing/selftests/landlock/tsync_test.c | 74 ++++++
4 files changed, 319 insertions(+), 6 deletions(-)
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index f88fa1f68b77..d37603efc273 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -116,7 +116,9 @@ struct landlock_ruleset_attr {
* ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects
* future nested domains, not the one being created. It can also be used
* with a @ruleset_fd value of -1 to mute subdomain logs without creating a
- * domain.
+ * domain. When combined with %LANDLOCK_RESTRICT_SELF_TSYNC and a
+ * @ruleset_fd value of -1, this configuration is propagated to all threads
+ * of the current process.
*
* The following flag supports policy enforcement in multithreaded processes:
*
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 0d66a68677b7..a0bb664e0d31 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -512,10 +512,13 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
/*
* It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
- * -1 as ruleset_fd, but no other flag must be set.
+ * -1 as ruleset_fd, optionally combined with
+ * LANDLOCK_RESTRICT_SELF_TSYNC to propagate this configuration to all
+ * threads. No other flag must be set.
*/
if (!(ruleset_fd == -1 &&
- flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ (flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) ==
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
/* Gets and checks the ruleset. */
ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
if (IS_ERR(ruleset))
@@ -537,9 +540,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
/*
* The only case when a ruleset may not be set is if
- * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1.
- * We could optimize this case by not calling commit_creds() if this flag
- * was already set, but it is not worth the complexity.
+ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set (optionally with
+ * LANDLOCK_RESTRICT_SELF_TSYNC) and ruleset_fd is -1. We could
+ * optimize this case by not calling commit_creds() if this flag was
+ * already set, but it is not worth the complexity.
*/
if (ruleset) {
/*
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index 20099b8667e7..a193d8a97560 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -162,6 +162,7 @@ TEST_F(audit, layers)
struct thread_data {
pid_t parent_pid;
int ruleset_fd, pipe_child, pipe_parent;
+ bool mute_subdomains;
};
static void *thread_audit_test(void *arg)
@@ -367,6 +368,238 @@ TEST_F(audit, log_subdomains_off_fork)
EXPECT_EQ(0, close(ruleset_fd));
}
+/*
+ * Thread function: runs two rounds of (create domain, trigger denial, signal
+ * back), waiting for the main thread before each round. When mute_subdomains
+ * is set, phase 1 also mutes subdomain logs via the fd=-1 path before creating
+ * the domain. The ruleset_fd is kept open across both rounds so each
+ * restrict_self call stacks a new domain layer.
+ */
+static void *thread_sandbox_deny_twice(void *arg)
+{
+ const struct thread_data *data = (struct thread_data *)arg;
+ uintptr_t err = 0;
+ char buffer;
+
+ /* Phase 1: optionally mutes, creates a domain, and triggers a denial. */
+ if (read(data->pipe_parent, &buffer, 1) != 1) {
+ err = 1;
+ goto out;
+ }
+
+ if (data->mute_subdomains &&
+ landlock_restrict_self(-1,
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ err = 2;
+ goto out;
+ }
+
+ if (landlock_restrict_self(data->ruleset_fd, 0)) {
+ err = 3;
+ goto out;
+ }
+
+ if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
+ err = 4;
+ goto out;
+ }
+
+ if (write(data->pipe_child, ".", 1) != 1) {
+ err = 5;
+ goto out;
+ }
+
+ /* Phase 2: stacks another domain and triggers a denial. */
+ if (read(data->pipe_parent, &buffer, 1) != 1) {
+ err = 6;
+ goto out;
+ }
+
+ if (landlock_restrict_self(data->ruleset_fd, 0)) {
+ err = 7;
+ goto out;
+ }
+
+ if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
+ err = 8;
+ goto out;
+ }
+
+ if (write(data->pipe_child, ".", 1) != 1) {
+ err = 9;
+ goto out;
+ }
+
+out:
+ close(data->ruleset_fd);
+ close(data->pipe_child);
+ close(data->pipe_parent);
+ return (void *)err;
+}
+
+/*
+ * Verifies that LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
+ * LANDLOCK_RESTRICT_SELF_TSYNC and ruleset_fd=-1 propagates log_subdomains_off
+ * to a sibling thread, suppressing audit logging on domains it subsequently
+ * creates.
+ *
+ * Phase 1 (before TSYNC) acts as an inline baseline: the sibling creates a
+ * domain and triggers a denial that IS logged.
+ *
+ * Phase 2 (after TSYNC) verifies suppression: the sibling stacks another domain
+ * and triggers a denial that is NOT logged.
+ */
+TEST_F(audit, log_subdomains_off_tsync)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ struct audit_records records;
+ struct thread_data child_data;
+ int pipe_child[2], pipe_parent[2];
+ char buffer;
+ pthread_t thread;
+ void *thread_ret;
+
+ child_data.parent_pid = getppid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ child_data.pipe_child = pipe_child[1];
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ child_data.pipe_parent = pipe_parent[0];
+ child_data.ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, child_data.ruleset_fd);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ /* Creates the sibling thread. */
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
+ &child_data));
+
+ /*
+ * Phase 1: the sibling creates a domain and triggers a denial before
+ * any log muting. This proves the audit path works.
+ */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+ /* The denial must be logged. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, NULL));
+
+ /* Drains any remaining records (e.g. domain allocation). */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+
+ /*
+ * Mutes subdomain logs and propagates to the sibling thread via TSYNC,
+ * without creating a domain.
+ */
+ ASSERT_EQ(0, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_TSYNC));
+
+ /*
+ * Phase 2: the sibling stacks another domain and triggers a denial.
+ * Because log_subdomains_off was propagated via TSYNC, the new domain
+ * has log_status=LANDLOCK_LOG_DISABLED.
+ */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+ /* No denial record should appear. */
+ EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, NULL));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ EXPECT_EQ(0, close(pipe_child[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, pthread_join(thread, &thread_ret));
+ EXPECT_EQ(NULL, thread_ret);
+}
+
+/*
+ * Verifies that LANDLOCK_RESTRICT_SELF_TSYNC without
+ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF overrides a sibling thread's
+ * log_subdomains_off, re-enabling audit logging on domains the sibling
+ * subsequently creates.
+ *
+ * Phase 1: the sibling sets log_subdomains_off, creates a muted domain, and
+ * triggers a denial that is NOT logged.
+ *
+ * Phase 2 (after TSYNC without LOG_SUBDOMAINS_OFF): the sibling stacks another
+ * domain and triggers a denial that IS logged, proving the muting was
+ * overridden.
+ */
+TEST_F(audit, tsync_override_log_subdomains_off)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ struct audit_records records;
+ struct thread_data child_data;
+ int pipe_child[2], pipe_parent[2];
+ char buffer;
+ pthread_t thread;
+ void *thread_ret;
+
+ child_data.parent_pid = getppid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ child_data.pipe_child = pipe_child[1];
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ child_data.pipe_parent = pipe_parent[0];
+ child_data.ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, child_data.ruleset_fd);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ child_data.mute_subdomains = true;
+
+ /* Creates the sibling thread. */
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
+ &child_data));
+
+ /*
+ * Phase 1: the sibling mutes subdomain logs, creates a domain, and
+ * triggers a denial. The denial must not be logged.
+ */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+ EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, NULL));
+
+ /* Drains any remaining records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ /*
+ * Overrides the sibling's log_subdomains_off by calling TSYNC without
+ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.
+ */
+ ASSERT_EQ(0, landlock_restrict_self(child_data.ruleset_fd,
+ LANDLOCK_RESTRICT_SELF_TSYNC));
+
+ /*
+ * Phase 2: the sibling stacks another domain and triggers a denial.
+ * Because TSYNC replaced its log_subdomains_off with 0, the new domain
+ * has log_status=LANDLOCK_LOG_PENDING.
+ */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+
+ /* The denial must be logged. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, NULL));
+
+ EXPECT_EQ(0, close(pipe_child[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, pthread_join(thread, &thread_ret));
+ EXPECT_EQ(NULL, thread_ret);
+}
+
FIXTURE(audit_flags)
{
struct audit_filter audit_filter;
diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
index 2b9ad4f154f4..abc290271a1a 100644
--- a/tools/testing/selftests/landlock/tsync_test.c
+++ b/tools/testing/selftests/landlock/tsync_test.c
@@ -247,4 +247,78 @@ TEST(tsync_interrupt)
EXPECT_EQ(0, close(ruleset_fd));
}
+/* clang-format off */
+FIXTURE(tsync_without_ruleset) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(tsync_without_ruleset)
+{
+ const __u32 flags;
+ const int expected_errno;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, tsync_only) {
+ /* clang-format on */
+ .flags = LANDLOCK_RESTRICT_SELF_TSYNC,
+ .expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_same_exec_off) {
+ /* clang-format on */
+ .flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_TSYNC,
+ .expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_new_exec_on) {
+ /* clang-format on */
+ .flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+ LANDLOCK_RESTRICT_SELF_TSYNC,
+ .expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, all_flags) {
+ /* clang-format on */
+ .flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_TSYNC,
+ .expected_errno = EBADF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off) {
+ /* clang-format on */
+ .flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_TSYNC,
+ .expected_errno = 0,
+};
+
+FIXTURE_SETUP(tsync_without_ruleset)
+{
+}
+
+FIXTURE_TEARDOWN(tsync_without_ruleset)
+{
+}
+
+TEST_F(tsync_without_ruleset, check)
+{
+ int ret;
+
+ ret = landlock_restrict_self(-1, variant->flags);
+ if (variant->expected_errno) {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(variant->expected_errno, errno);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+}
+
TEST_HARNESS_MAIN
--
2.53.0
^ permalink raw reply related
* [PATCH v1 1/2] landlock: Fix log_subdomains_off inheritance across fork()
From: Mickaël Salaün @ 2026-04-04 8:49 UTC (permalink / raw)
To: Günther Noack
Cc: Mickaël Salaün, linux-security-module, stable
hook_cred_transfer() only copies the Landlock security blob when the
source credential has a domain. This is inconsistent with
landlock_restrict_self() which can set log_subdomains_off on a
credential without creating a domain (via the ruleset_fd=-1 path): the
field is committed but not preserved across fork() because the child's
prepare_creds() calls hook_cred_transfer() which skips the copy when
domain is NULL.
This breaks the documented use case where a process mutes subdomain logs
before forking sandboxed children: the children lose the muting and
their domains produce unexpected audit records.
Fix this by unconditionally copying the Landlock credential blob.
landlock_get_ruleset(NULL) is already a safe no-op.
Cc: Günther Noack <gnoack@google.com>
Cc: stable@vger.kernel.org
Fixes: ead9079f7569 ("landlock: Add LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF")
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
security/landlock/cred.c | 6 +-
tools/testing/selftests/landlock/audit_test.c | 88 +++++++++++++++++++
2 files changed, 90 insertions(+), 4 deletions(-)
diff --git a/security/landlock/cred.c b/security/landlock/cred.c
index 0cb3edde4d18..cc419de75cd6 100644
--- a/security/landlock/cred.c
+++ b/security/landlock/cred.c
@@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cred *const new,
const struct landlock_cred_security *const old_llcred =
landlock_cred(old);
- if (old_llcred->domain) {
- landlock_get_ruleset(old_llcred->domain);
- *landlock_cred(new) = *old_llcred;
- }
+ landlock_get_ruleset(old_llcred->domain);
+ *landlock_cred(new) = *old_llcred;
}
static int hook_cred_prepare(struct cred *const new,
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index 46d02d49835a..20099b8667e7 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -279,6 +279,94 @@ TEST_F(audit, thread)
&audit_tv_default, sizeof(audit_tv_default)));
}
+/*
+ * Verifies that log_subdomains_off set via the ruleset_fd=-1 path (without
+ * creating a domain) is inherited by children across fork(). This exercises
+ * the hook_cred_transfer() fix: the Landlock credential blob must be copied
+ * even when the source credential has no domain.
+ *
+ * Phase 1 (baseline): a child without muting creates a domain and triggers a
+ * denial that IS logged.
+ *
+ * Phase 2 (after muting): the parent mutes subdomain logs, forks another child
+ * who creates a domain and triggers a denial that is NOT logged.
+ */
+TEST_F(audit, log_subdomains_off_fork)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ struct audit_records records;
+ int ruleset_fd, status;
+ pid_t child;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ /*
+ * Phase 1: forks a child that creates a domain and triggers a denial
+ * before any muting. This proves the audit path works.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(-1, kill(getppid(), 0));
+ ASSERT_EQ(EPERM, errno);
+ _exit(0);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(true, WIFEXITED(status));
+ ASSERT_EQ(0, WEXITSTATUS(status));
+
+ /* The denial must be logged (baseline). */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, getpid(),
+ NULL));
+
+ /* Drains any remaining records (e.g. domain allocation). */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+
+ /*
+ * Mutes subdomain logs without creating a domain. The parent's
+ * credential has domain=NULL and log_subdomains_off=1.
+ */
+ ASSERT_EQ(0, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+
+ /*
+ * Phase 2: forks a child that creates a domain and triggers a denial.
+ * Because log_subdomains_off was inherited via fork(), the child's
+ * domain has log_status=LANDLOCK_LOG_DISABLED.
+ */
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(-1, kill(getppid(), 0));
+ ASSERT_EQ(EPERM, errno);
+ _exit(0);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(true, WIFEXITED(status));
+ ASSERT_EQ(0, WEXITSTATUS(status));
+
+ /* No denial record should appear. */
+ EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
FIXTURE(audit_flags)
{
struct audit_filter audit_filter;
--
2.53.0
^ permalink raw reply related
* Re: [PATCH v4 2/3] lsm: add backing_file LSM hooks
From: Paul Moore @ 2026-04-03 21:14 UTC (permalink / raw)
To: Amir Goldstein
Cc: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs, Gao Xiang, Christian Brauner
In-Reply-To: <CAOQ4uxgd1wo9U32L_sQLfswY93LRp4yPzkJvKtj=wDKi8h13gg@mail.gmail.com>
On Fri, Apr 3, 2026 at 2:12 AM Amir Goldstein <amir73il@gmail.com> wrote:
> On Fri, Apr 3, 2026 at 5:09 AM Paul Moore <paul@paul-moore.com> wrote:
> >
> > Stacked filesystems such as overlayfs do not currently provide the
> > necessary mechanisms for LSMs to properly enforce access controls on the
> > mmap() and mprotect() operations. In order to resolve this gap, a LSM
> > security blob is being added to the backing_file struct and the following
> > new LSM hooks are being created:
> >
> > security_backing_file_alloc()
> > security_backing_file_free()
> > security_mmap_backing_file()
> >
> > The first two hooks are to manage the lifecycle of the LSM security blob
> > in the backing_file struct, while the third provides a new mmap() access
> > control point for the underlying backing file. It is also expected that
> > LSMs will likely want to update their security_file_mprotect() callback
> > to address issues with their mprotect() controls, but that does not
> > require a change to the security_file_mprotect() LSM hook.
> >
> > There are a three other small changes to support these new LSM hooks:
> > * Pass the user file associated with a backing file down to
> > alloc_empty_backing_file() so it can be included in the
> > security_backing_file_alloc() hook.
> > * Add getter and setter functions for the backing_file struct LSM blob
> > as the backing_file struct remains private to fs/file_table.c.
> > * Constify the file struct field in the LSM common_audit_data struct to
> > better support LSMs that need to pass a const file struct pointer into
> > the common LSM audit code.
> >
> > Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
> > and supplying a fixup.
> >
> > Cc: stable@vger.kernel.org
> > Cc: linux-fsdevel@vger.kernel.org
> > Cc: linux-unionfs@vger.kernel.org
> > Cc: linux-erofs@lists.ozlabs.org
> > Signed-off-by: Paul Moore <paul@paul-moore.com>
>
> That looks nicer.
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
>
> Thanks,
> Amir.
Thanks for refreshing your review. Since we are at the end of -rc6,
it probably doesn't make much sense to put this in lsm/stable-7.0; I'm
going to merge this into lsm/dev which should give us at least one
week in linux-next before the v7.1 merge window opens. If others want
to add their ACKs/Reviewed-by during that time, I'll update the
branch.
--
paul-moore.com
^ permalink raw reply
* Re: [PATCH v3 0/5] Fix Landlock audit test flakiness
From: Mickaël Salaün @ 2026-04-03 17:08 UTC (permalink / raw)
To: Günther Noack
Cc: Günther Noack, linux-security-module, Justin Suess,
Tingmao Wang
In-Reply-To: <20260402.eb5c4e85f472@gnoack.org>
On Thu, Apr 02, 2026 at 10:52:46PM +0200, Günther Noack wrote:
> Hello!
>
> On Thu, Apr 02, 2026 at 09:26:01PM +0200, Mickaël Salaün wrote:
> > This series fixes two classes of audit selftest failures plus two minor
> > bugs in the audit test helpers.
> >
> > The main issue is that domain deallocation audit records are emitted
> > asynchronously from kworker threads and can arrive after a previous
> > test's socket has been closed. This causes two distinct failure modes:
> >
> > - audit_match_record() picks up a stale deallocation record from a
> > previous test instead of the expected one, causing a domain ID
> > mismatch. The audit.layers test (which reads 16 deallocation records
> > in sequence) is particularly vulnerable because the large read window
> > allows stale records to interleave. Patch 4 fixes this by filtering
> > deallocation records by domain ID and skipping type-matching records
> > with wrong content patterns.
> >
> > - audit_count_records() counts stale deallocation records from a
> > previous test, incrementing records.domain from the expected 0 to 1.
> > Patch 3 fixes this by draining stale records at audit_init() time and
> > removing records.domain == 0 checks that are not preceded by
> > audit_match_record() calls (which would consume stale records).
> >
> > These races are more likely to manifest when additional instrumentation
> > changes kworker timing in the deallocation path (e.g. with the upcoming
> > Landlock tracepoints work).
> >
> > The two minor fixes (patches 1-2) correct a snprintf truncation check
> > off-by-one and socket file descriptor leaks on error paths in
> > audit_init(), audit_init_with_exe_filter(), and audit_cleanup().
> > Patch 5 fixes a __u64 format warning reported by the kbuild bot on
> > powerpc64.
> >
> > Patch 1 is an exact subset of the v1 combined patch, which is why it
> > carries the Reviewed-by tag. Patches 2 and 3 extend beyond what was in
> > v1, so the Reviewed-by is not carried. Patches 4 and 5 are new.
> >
> > Changes since v2:
> > https://lore.kernel.org/r/20260401161503.1136946-1-mic@digikod.net
> > - Patches 4-5: fix __u64 format warnings on powerpc64 (cast to unsigned
> > long long for %llx). Patch 5 is new.
> >
> > Changes since v1:
> > https://lore.kernel.org/r/20260312100444.2609563-8-mic@digikod.net
> > - Split the combined drain fix into four separate patches.
> > - Patch 2: extend fd leak fix to audit_init_with_exe_filter() and
> > audit_cleanup().
> > - Patch 3: also remove domain checks from audit.trace and
> > scoped_audit.connect_to_child, document constraint, explain why a
> > longer drain timeout was rejected.
> > - Patch 4: new, add domain ID filtering and timeout management to
> > matches_log_domain_deallocated(), skip stale records in
> > audit_match_record().
> >
> > Mickaël Salaün (5):
> > selftests/landlock: Fix snprintf truncation checks in audit helpers
> > selftests/landlock: Fix socket file descriptor leaks in audit helpers
> > selftests/landlock: Drain stale audit records on init
> > selftests/landlock: Skip stale records in audit_match_record()
> > selftests/landlock: Fix format warning for __u64 in net_test
> >
> > tools/testing/selftests/landlock/audit.h | 133 ++++++++++++++----
> > tools/testing/selftests/landlock/audit_test.c | 36 ++---
> > tools/testing/selftests/landlock/net_test.c | 2 +-
> > .../testing/selftests/landlock/ptrace_test.c | 1 -
> > .../landlock/scoped_abstract_unix_test.c | 1 -
> > 5 files changed, 119 insertions(+), 54 deletions(-)
> >
> > --
> > 2.53.0
> >
>
> I am still getting flaky audit tests even with these patches, I am
> afraid. It differs which of these tests is flaking, some of them
> still do, for example:
>
> # RUN audit_layout1.remove_dir ...
> # fs_test.c:7281:remove_dir:Expected 0 (0) == matches_log_fs(_metadata, self->audit_fd, "fs\\.remove_dir", dir_s1d2) (-11)
> # remove_dir: Test failed
> # ❌ FAIL audit_layout1.remove_dir
> not ok 191 audit_layout1.remove_dir
> # RUN audit_layout1.read_dir ...
> # ✅ OK audit_layout1.read_dir
> ok 192 audit_layout1.read_dir
> # RUN audit_layout1.read_file ...
> # ✅ OK audit_layout1.read_file
> ok 193 audit_layout1.read_file
> # RUN audit_layout1.write_file ...
> # fs_test.c:7221:write_file:Expected 0 (0) == matches_log_fs(_metadata, self->audit_fd, "fs\\.write_file", file1_s1d1) (-11)
> # fs_test.c:7224:write_file:Expected 0 (0) == records.access (1)
> # write_file: Test failed
> # ❌ FAIL audit_layout1.write_file
> not ok 194 audit_layout1.write_file
I never hit these issues and I cannot reproduce them. This patch fixes
the async events (i.e. domain drops).
You can try to increase audit_tv_default.
>
> My kernel config is this:
>
> make defconfig
> make kvm_guest.config
> KCONFIG_CONFIG="${KBUILD_OUTPUT}/.config" ./scripts/kconfig/merge_config.sh "${KBUILD_OUTPUT}/.config" tools/testing/selftests/landlock/config
> make debug.config
> echo "CONFIG_RANDOMIZE_BASE=n" >> "${KBUILD_OUTPUT}/.config"
> make olddefconfig
>
> and then I run the selftests in Qemu with these flags:
>
> qemu-system-x86_64 \
> -nographic \
> -m 4G \
> -enable-kvm \
> -append "console=ttyS0 lsm=landlock no_hash_pointers" \
> -kernel "${KBUILD_OUTPUT}/arch/x86/boot/bzImage" \
> -initrd "${INITRAMFS}"
>
> This is using my own selftest runner scripts which builds an initramfs
> with the statically linked selftests.
Can you try with the check-linux.sh build kselftest (which also set a
lot of debug options)? You can also try with qemu if you set
ARCH=x86_64
>
> Do you have a hunch what might be missing there? In the test run
> above, I have applied your V4 patch set on top of the current master,
> 5619b098e2fbf3a23bf13d91897056a1fe238c6d ("Merge tag 'for-7.0-rc6-tag'
> of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux").
This is weird because this is related to FS events, and they should be
(almost) synchronous events. Maybe the audit event pipeline is made
very slow because of some audit options but still...
Anyway, this is not what this patch fixes, but we should fix your issues
as well.
^ permalink raw reply
* Re: [PATCH v4 3/3] selinux: fix overlayfs mmap() and mprotect() access checks
From: Amir Goldstein @ 2026-04-03 6:17 UTC (permalink / raw)
To: Paul Moore
Cc: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-8-paul@paul-moore.com>
On Fri, Apr 3, 2026 at 5:09 AM Paul Moore <paul@paul-moore.com> wrote:
>
> The existing SELinux security model for overlayfs is to allow access if
> the current task is able to access the top level file (the "user" file)
> and the mounter's credentials are sufficient to access the lower
> level file (the "backing" file). Unfortunately, the current code does
> not properly enforce these access controls for both mmap() and mprotect()
> operations on overlayfs filesystems.
>
> This patch makes use of the newly created security_mmap_backing_file()
> LSM hook to provide the missing backing file enforcement for mmap()
> operations, and leverages the backing file API and new LSM blob to
> provide the necessary information to properly enforce the mprotect()
> access controls.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Paul Moore <paul@paul-moore.com>
Can't say much about selinux implementation, but
for the use of backing file API and the concept solution
Acked-by: Amir Goldstein <amir73il@gmail.com>
Thanks,
Amir.
> ---
> security/selinux/hooks.c | 256 +++++++++++++++++++++---------
> security/selinux/include/objsec.h | 11 ++
> 2 files changed, 196 insertions(+), 71 deletions(-)
>
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index d8224ea113d1..76e0fb7dcb36 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -1745,6 +1745,60 @@ static inline int file_path_has_perm(const struct cred *cred,
> static int bpf_fd_pass(const struct file *file, u32 sid);
> #endif
>
> +static int __file_has_perm(const struct cred *cred, const struct file *file,
> + u32 av, bool bf_user_file)
> +
> +{
> + struct common_audit_data ad;
> + struct inode *inode;
> + u32 ssid = cred_sid(cred);
> + u32 tsid_fd;
> + int rc;
> +
> + if (bf_user_file) {
> + struct backing_file_security_struct *bfsec;
> + const struct path *path;
> +
> + if (WARN_ON(!(file->f_mode & FMODE_BACKING)))
> + return -EIO;
> +
> + bfsec = selinux_backing_file(file);
> + path = backing_file_user_path(file);
> + tsid_fd = bfsec->uf_sid;
> + inode = d_inode(path->dentry);
> +
> + ad.type = LSM_AUDIT_DATA_PATH;
> + ad.u.path = *path;
> + } else {
> + struct file_security_struct *fsec = selinux_file(file);
> +
> + tsid_fd = fsec->sid;
> + inode = file_inode(file);
> +
> + ad.type = LSM_AUDIT_DATA_FILE;
> + ad.u.file = file;
> + }
> +
> + if (ssid != tsid_fd) {
> + rc = avc_has_perm(ssid, tsid_fd, SECCLASS_FD, FD__USE, &ad);
> + if (rc)
> + return rc;
> + }
> +
> +#ifdef CONFIG_BPF_SYSCALL
> + /* regardless of backing vs user file, use the underlying file here */
> + rc = bpf_fd_pass(file, ssid);
> + if (rc)
> + return rc;
> +#endif
> +
> + /* av is zero if only checking access to the descriptor. */
> + if (av)
> + return inode_has_perm(cred, inode, av, &ad);
> +
> + return 0;
> +}
> +
> /* Check whether a task can use an open file descriptor to
> access an inode in a given way. Check access to the
> descriptor itself, and then use dentry_has_perm to
> @@ -1753,41 +1807,10 @@ static int bpf_fd_pass(const struct file *file, u32 sid);
> has the same SID as the process. If av is zero, then
> access to the file is not checked, e.g. for cases
> where only the descriptor is affected like seek. */
> -static int file_has_perm(const struct cred *cred,
> - struct file *file,
> - u32 av)
> +static inline int file_has_perm(const struct cred *cred,
> + const struct file *file, u32 av)
> {
> - struct file_security_struct *fsec = selinux_file(file);
> - struct inode *inode = file_inode(file);
> - struct common_audit_data ad;
> - u32 sid = cred_sid(cred);
> - int rc;
> -
> - ad.type = LSM_AUDIT_DATA_FILE;
> - ad.u.file = file;
> -
> - if (sid != fsec->sid) {
> - rc = avc_has_perm(sid, fsec->sid,
> - SECCLASS_FD,
> - FD__USE,
> - &ad);
> - if (rc)
> - goto out;
> - }
> -
> -#ifdef CONFIG_BPF_SYSCALL
> - rc = bpf_fd_pass(file, cred_sid(cred));
> - if (rc)
> - return rc;
> -#endif
> -
> - /* av is zero if only checking access to the descriptor. */
> - rc = 0;
> - if (av)
> - rc = inode_has_perm(cred, inode, av, &ad);
> -
> -out:
> - return rc;
> + return __file_has_perm(cred, file, av, false);
> }
>
> /*
> @@ -3825,6 +3848,17 @@ static int selinux_file_alloc_security(struct file *file)
> return 0;
> }
>
> +static int selinux_backing_file_alloc(struct file *backing_file,
> + const struct file *user_file)
> +{
> + struct backing_file_security_struct *bfsec;
> +
> + bfsec = selinux_backing_file(backing_file);
> + bfsec->uf_sid = selinux_file(user_file)->sid;
> +
> + return 0;
> +}
> +
> /*
> * Check whether a task has the ioctl permission and cmd
> * operation to an inode.
> @@ -3942,42 +3976,55 @@ static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
>
> static int default_noexec __ro_after_init;
>
> -static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
> +static int __file_map_prot_check(const struct cred *cred,
> + const struct file *file, unsigned long prot,
> + bool shared, bool bf_user_file)
> {
> - const struct cred *cred = current_cred();
> - u32 sid = cred_sid(cred);
> - int rc = 0;
> + struct inode *inode = NULL;
> + bool prot_exec = prot & PROT_EXEC;
> + bool prot_write = prot & PROT_WRITE;
> +
> + if (file) {
> + if (bf_user_file)
> + inode = d_inode(backing_file_user_path(file)->dentry);
> + else
> + inode = file_inode(file);
> + }
> +
> + if (default_noexec && prot_exec &&
> + (!file || IS_PRIVATE(inode) || (!shared && prot_write))) {
> + int rc;
> + u32 sid = cred_sid(cred);
>
> - if (default_noexec &&
> - (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
> - (!shared && (prot & PROT_WRITE)))) {
> /*
> - * We are making executable an anonymous mapping or a
> - * private file mapping that will also be writable.
> - * This has an additional check.
> + * We are making executable an anonymous mapping or a private
> + * file mapping that will also be writable.
> */
> - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
> - PROCESS__EXECMEM, NULL);
> + rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM,
> + NULL);
> if (rc)
> - goto error;
> + return rc;
> }
>
> if (file) {
> - /* read access is always possible with a mapping */
> + /* "read" always possible, "write" only if shared */
> u32 av = FILE__READ;
> -
> - /* write access only matters if the mapping is shared */
> - if (shared && (prot & PROT_WRITE))
> + if (shared && prot_write)
> av |= FILE__WRITE;
> -
> - if (prot & PROT_EXEC)
> + if (prot_exec)
> av |= FILE__EXECUTE;
>
> - return file_has_perm(cred, file, av);
> + return __file_has_perm(cred, file, av, bf_user_file);
> }
>
> -error:
> - return rc;
> + return 0;
> +}
> +
> +static inline int file_map_prot_check(const struct cred *cred,
> + const struct file *file,
> + unsigned long prot, bool shared)
> +{
> + return __file_map_prot_check(cred, file, prot, shared, false);
> }
>
> static int selinux_mmap_addr(unsigned long addr)
> @@ -3993,36 +4040,80 @@ static int selinux_mmap_addr(unsigned long addr)
> return rc;
> }
>
> -static int selinux_mmap_file(struct file *file,
> - unsigned long reqprot __always_unused,
> - unsigned long prot, unsigned long flags)
> +static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
> + unsigned long prot, bool shared)
> {
> - struct common_audit_data ad;
> - int rc;
> -
> if (file) {
> + int rc;
> + struct common_audit_data ad;
> +
> ad.type = LSM_AUDIT_DATA_FILE;
> ad.u.file = file;
> - rc = inode_has_perm(current_cred(), file_inode(file),
> - FILE__MAP, &ad);
> + rc = inode_has_perm(cred, file_inode(file), FILE__MAP, &ad);
> if (rc)
> return rc;
> }
>
> - return file_map_prot_check(file, prot,
> - (flags & MAP_TYPE) == MAP_SHARED);
> + return file_map_prot_check(cred, file, prot, shared);
> +}
> +
> +static int selinux_mmap_file(struct file *file,
> + unsigned long reqprot __always_unused,
> + unsigned long prot, unsigned long flags)
> +{
> + return selinux_mmap_file_common(current_cred(), file, prot,
> + (flags & MAP_TYPE) == MAP_SHARED);
> +}
> +
> +/**
> + * selinux_mmap_backing_file - Check mmap permissions on a backing file
> + * @vma: memory region
> + * @backing_file: stacked filesystem backing file
> + * @user_file: user visible file
> + *
> + * This is called after selinux_mmap_file() on stacked filesystems, and it
> + * is this function's responsibility to verify access to @backing_file and
> + * setup the SELinux state for possible later use in the mprotect() code path.
> + *
> + * By the time this function is called, mmap() access to @user_file has already
> + * been authorized and @vma->vm_file has been set to point to @backing_file.
> + *
> + * Return zero on success, negative values otherwise.
> + */
> +static int selinux_mmap_backing_file(struct vm_area_struct *vma,
> + struct file *backing_file,
> + struct file *user_file __always_unused)
> +{
> + unsigned long prot = 0;
> +
> + /* translate vma->vm_flags perms into PROT perms */
> + if (vma->vm_flags & VM_READ)
> + prot |= PROT_READ;
> + if (vma->vm_flags & VM_WRITE)
> + prot |= PROT_WRITE;
> + if (vma->vm_flags & VM_EXEC)
> + prot |= PROT_EXEC;
> +
> + return selinux_mmap_file_common(backing_file->f_cred, backing_file,
> + prot, vma->vm_flags & VM_SHARED);
> }
>
> static int selinux_file_mprotect(struct vm_area_struct *vma,
> unsigned long reqprot __always_unused,
> unsigned long prot)
> {
> + int rc;
> const struct cred *cred = current_cred();
> u32 sid = cred_sid(cred);
> + const struct file *file = vma->vm_file;
> + bool backing_file;
> + bool shared = vma->vm_flags & VM_SHARED;
> +
> + /* check if we need to trigger the "backing files are awful" mode */
> + backing_file = file && (file->f_mode & FMODE_BACKING);
>
> if (default_noexec &&
> (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
> - int rc = 0;
> /*
> * We don't use the vma_is_initial_heap() helper as it has
> * a history of problems and is currently broken on systems
> @@ -4036,11 +4127,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
> vma->vm_end <= vma->vm_mm->brk) {
> rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
> PROCESS__EXECHEAP, NULL);
> - } else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
> + if (rc)
> + return rc;
> + } else if (!file && (vma_is_initial_stack(vma) ||
> vma_is_stack_for_current(vma))) {
> rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
> PROCESS__EXECSTACK, NULL);
> - } else if (vma->vm_file && vma->anon_vma) {
> + if (rc)
> + return rc;
> + } else if (file && vma->anon_vma) {
> /*
> * We are making executable a file mapping that has
> * had some COW done. Since pages might have been
> @@ -4048,13 +4143,29 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
> * modified content. This typically should only
> * occur for text relocations.
> */
> - rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
> + rc = __file_has_perm(cred, file, FILE__EXECMOD,
> + backing_file);
> + if (rc)
> + return rc;
> + if (backing_file) {
> + rc = file_has_perm(file->f_cred, file,
> + FILE__EXECMOD);
> + if (rc)
> + return rc;
> + }
> }
> + }
> +
> + rc = __file_map_prot_check(cred, file, prot, shared, backing_file);
> + if (rc)
> + return rc;
> + if (backing_file) {
> + rc = file_map_prot_check(file->f_cred, file, prot, shared);
> if (rc)
> return rc;
> }
>
> - return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
> + return 0;
> }
>
> static int selinux_file_lock(struct file *file, unsigned int cmd)
> @@ -7393,6 +7504,7 @@ struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
> .lbs_cred = sizeof(struct cred_security_struct),
> .lbs_task = sizeof(struct task_security_struct),
> .lbs_file = sizeof(struct file_security_struct),
> + .lbs_backing_file = sizeof(struct backing_file_security_struct),
> .lbs_inode = sizeof(struct inode_security_struct),
> .lbs_ipc = sizeof(struct ipc_security_struct),
> .lbs_key = sizeof(struct key_security_struct),
> @@ -7498,9 +7610,11 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
>
> LSM_HOOK_INIT(file_permission, selinux_file_permission),
> LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
> + LSM_HOOK_INIT(backing_file_alloc, selinux_backing_file_alloc),
> LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
> LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
> LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
> + LSM_HOOK_INIT(mmap_backing_file, selinux_mmap_backing_file),
> LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
> LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
> LSM_HOOK_INIT(file_lock, selinux_file_lock),
> diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
> index 5bddd28ea5cb..b19e5d978e82 100644
> --- a/security/selinux/include/objsec.h
> +++ b/security/selinux/include/objsec.h
> @@ -88,6 +88,10 @@ struct file_security_struct {
> u32 pseqno; /* Policy seqno at the time of file open */
> };
>
> +struct backing_file_security_struct {
> + u32 uf_sid; /* associated user file fsec->sid */
> +};
> +
> struct superblock_security_struct {
> u32 sid; /* SID of file system superblock */
> u32 def_sid; /* default SID for labeling */
> @@ -195,6 +199,13 @@ static inline struct file_security_struct *selinux_file(const struct file *file)
> return file->f_security + selinux_blob_sizes.lbs_file;
> }
>
> +static inline struct backing_file_security_struct *
> +selinux_backing_file(const struct file *backing_file)
> +{
> + void *blob = backing_file_security(backing_file);
> + return blob + selinux_blob_sizes.lbs_backing_file;
> +}
> +
> static inline struct inode_security_struct *
> selinux_inode(const struct inode *inode)
> {
> --
> 2.53.0
>
^ permalink raw reply
* Re: [PATCH v4 2/3] lsm: add backing_file LSM hooks
From: Amir Goldstein @ 2026-04-03 6:12 UTC (permalink / raw)
To: Paul Moore
Cc: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-7-paul@paul-moore.com>
On Fri, Apr 3, 2026 at 5:09 AM Paul Moore <paul@paul-moore.com> wrote:
>
> Stacked filesystems such as overlayfs do not currently provide the
> necessary mechanisms for LSMs to properly enforce access controls on the
> mmap() and mprotect() operations. In order to resolve this gap, a LSM
> security blob is being added to the backing_file struct and the following
> new LSM hooks are being created:
>
> security_backing_file_alloc()
> security_backing_file_free()
> security_mmap_backing_file()
>
> The first two hooks are to manage the lifecycle of the LSM security blob
> in the backing_file struct, while the third provides a new mmap() access
> control point for the underlying backing file. It is also expected that
> LSMs will likely want to update their security_file_mprotect() callback
> to address issues with their mprotect() controls, but that does not
> require a change to the security_file_mprotect() LSM hook.
>
> There are a three other small changes to support these new LSM hooks:
> * Pass the user file associated with a backing file down to
> alloc_empty_backing_file() so it can be included in the
> security_backing_file_alloc() hook.
> * Add getter and setter functions for the backing_file struct LSM blob
> as the backing_file struct remains private to fs/file_table.c.
> * Constify the file struct field in the LSM common_audit_data struct to
> better support LSMs that need to pass a const file struct pointer into
> the common LSM audit code.
>
> Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
> and supplying a fixup.
>
> Cc: stable@vger.kernel.org
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-unionfs@vger.kernel.org
> Cc: linux-erofs@lists.ozlabs.org
> Signed-off-by: Paul Moore <paul@paul-moore.com>
That looks nicer.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Thanks,
Amir.
> ---
> fs/backing-file.c | 18 ++++--
> fs/erofs/ishare.c | 10 +++-
> fs/file_table.c | 27 +++++++--
> fs/fuse/passthrough.c | 2 +-
> fs/internal.h | 3 +-
> fs/overlayfs/dir.c | 2 +-
> fs/overlayfs/file.c | 2 +-
> include/linux/backing-file.h | 4 +-
> include/linux/fs.h | 13 +++++
> include/linux/lsm_audit.h | 2 +-
> include/linux/lsm_hook_defs.h | 5 ++
> include/linux/lsm_hooks.h | 1 +
> include/linux/security.h | 22 ++++++++
> security/lsm.h | 1 +
> security/lsm_init.c | 9 +++
> security/security.c | 102 ++++++++++++++++++++++++++++++++++
> 16 files changed, 206 insertions(+), 17 deletions(-)
>
> diff --git a/fs/backing-file.c b/fs/backing-file.c
> index 45da8600d564..1f3bbfc75882 100644
> --- a/fs/backing-file.c
> +++ b/fs/backing-file.c
> @@ -12,6 +12,7 @@
> #include <linux/backing-file.h>
> #include <linux/splice.h>
> #include <linux/mm.h>
> +#include <linux/security.h>
>
> #include "internal.h"
>
> @@ -29,14 +30,15 @@
> * returned file into a container structure that also stores the stacked
> * file's path, which can be retrieved using backing_file_user_path().
> */
> -struct file *backing_file_open(const struct path *user_path, int flags,
> +struct file *backing_file_open(const struct file *user_file, int flags,
> const struct path *real_path,
> const struct cred *cred)
> {
> + const struct path *user_path = &user_file->f_path;
> struct file *f;
> int error;
>
> - f = alloc_empty_backing_file(flags, cred);
> + f = alloc_empty_backing_file(flags, cred, user_file);
> if (IS_ERR(f))
> return f;
>
> @@ -52,15 +54,16 @@ struct file *backing_file_open(const struct path *user_path, int flags,
> }
> EXPORT_SYMBOL_GPL(backing_file_open);
>
> -struct file *backing_tmpfile_open(const struct path *user_path, int flags,
> +struct file *backing_tmpfile_open(const struct file *user_file, int flags,
> const struct path *real_parentpath,
> umode_t mode, const struct cred *cred)
> {
> struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
> + const struct path *user_path = &user_file->f_path;
> struct file *f;
> int error;
>
> - f = alloc_empty_backing_file(flags, cred);
> + f = alloc_empty_backing_file(flags, cred, user_file);
> if (IS_ERR(f))
> return f;
>
> @@ -336,8 +339,13 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
>
> vma_set_file(vma, file);
>
> - scoped_with_creds(ctx->cred)
> + scoped_with_creds(ctx->cred) {
> + ret = security_mmap_backing_file(vma, file, user_file);
> + if (ret)
> + return ret;
> +
> ret = vfs_mmap(vma->vm_file, vma);
> + }
>
> if (ctx->accessed)
> ctx->accessed(user_file);
> diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
> index ec433bacc592..6ed66b17359b 100644
> --- a/fs/erofs/ishare.c
> +++ b/fs/erofs/ishare.c
> @@ -4,6 +4,7 @@
> */
> #include <linux/xxhash.h>
> #include <linux/mount.h>
> +#include <linux/security.h>
> #include "internal.h"
> #include "xattr.h"
>
> @@ -106,7 +107,8 @@ static int erofs_ishare_file_open(struct inode *inode, struct file *file)
>
> if (file->f_flags & O_DIRECT)
> return -EINVAL;
> - realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred());
> + realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred(),
> + file);
> if (IS_ERR(realfile))
> return PTR_ERR(realfile);
> ihold(sharedinode);
> @@ -150,8 +152,14 @@ static ssize_t erofs_ishare_file_read_iter(struct kiocb *iocb,
> static int erofs_ishare_mmap(struct file *file, struct vm_area_struct *vma)
> {
> struct file *realfile = file->private_data;
> + int err;
>
> vma_set_file(vma, realfile);
> +
> + err = security_mmap_backing_file(vma, realfile, file);
> + if (err)
> + return err;
> +
> return generic_file_readonly_mmap(file, vma);
> }
>
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 3b3792903185..d19d879b6efc 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -50,6 +50,9 @@ struct backing_file {
> struct path user_path;
> freeptr_t bf_freeptr;
> };
> +#ifdef CONFIG_SECURITY
> + void *security;
> +#endif
> };
>
> #define backing_file(f) container_of(f, struct backing_file, file)
> @@ -66,8 +69,21 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
> }
> EXPORT_SYMBOL_GPL(backing_file_set_user_path);
>
> +#ifdef CONFIG_SECURITY
> +void *backing_file_security(const struct file *f)
> +{
> + return backing_file(f)->security;
> +}
> +
> +void backing_file_set_security(struct file *f, void *security)
> +{
> + backing_file(f)->security = security;
> +}
> +#endif /* CONFIG_SECURITY */
> +
> static inline void backing_file_free(struct backing_file *ff)
> {
> + security_backing_file_free(&ff->file);
> path_put(&ff->user_path);
> kmem_cache_free(bfilp_cachep, ff);
> }
> @@ -288,10 +304,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> return f;
> }
>
> -static int init_backing_file(struct backing_file *ff)
> +static int init_backing_file(struct backing_file *ff,
> + const struct file *user_file)
> {
> memset(&ff->user_path, 0, sizeof(ff->user_path));
> - return 0;
> + backing_file_set_security(&ff->file, NULL);
> + return security_backing_file_alloc(&ff->file, user_file);
> }
>
> /*
> @@ -301,7 +319,8 @@ static int init_backing_file(struct backing_file *ff)
> * This is only for kernel internal use, and the allocate file must not be
> * installed into file tables or such.
> */
> -struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
> +struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
> + const struct file *user_file)
> {
> struct backing_file *ff;
> int error;
> @@ -318,7 +337,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
>
> /* The f_mode flags must be set before fput(). */
> ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
> - error = init_backing_file(ff);
> + error = init_backing_file(ff, user_file);
> if (unlikely(error)) {
> fput(&ff->file);
> return ERR_PTR(error);
> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> index 72de97c03d0e..f2d08ac2459b 100644
> --- a/fs/fuse/passthrough.c
> +++ b/fs/fuse/passthrough.c
> @@ -167,7 +167,7 @@ struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id)
> goto out;
>
> /* Allocate backing file per fuse file to store fuse path */
> - backing_file = backing_file_open(&file->f_path, file->f_flags,
> + backing_file = backing_file_open(file, file->f_flags,
> &fb->file->f_path, fb->cred);
> err = PTR_ERR(backing_file);
> if (IS_ERR(backing_file)) {
> diff --git a/fs/internal.h b/fs/internal.h
> index cbc384a1aa09..77e90e4124e0 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -106,7 +106,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
> */
> struct file *alloc_empty_file(int flags, const struct cred *cred);
> struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
> -struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
> +struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
> + const struct file *user_file);
> void backing_file_set_user_path(struct file *f, const struct path *path);
>
> static inline void file_put_write_access(struct file *file)
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index ff3dbd1ca61f..f2f20a611af3 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -1374,7 +1374,7 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
> return PTR_ERR(cred);
>
> ovl_path_upper(dentry->d_parent, &realparentpath);
> - realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath,
> + realfile = backing_tmpfile_open(file, flags, &realparentpath,
> mode, current_cred());
> err = PTR_ERR_OR_ZERO(realfile);
> pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err);
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 97bed2286030..27cc07738f33 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -48,7 +48,7 @@ static struct file *ovl_open_realfile(const struct file *file,
> if (!inode_owner_or_capable(real_idmap, realinode))
> flags &= ~O_NOATIME;
>
> - realfile = backing_file_open(file_user_path(file),
> + realfile = backing_file_open(file,
> flags, realpath, current_cred());
> }
> }
> diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
> index 1476a6ed1bfd..c939cd222730 100644
> --- a/include/linux/backing-file.h
> +++ b/include/linux/backing-file.h
> @@ -18,10 +18,10 @@ struct backing_file_ctx {
> void (*end_write)(struct kiocb *iocb, ssize_t);
> };
>
> -struct file *backing_file_open(const struct path *user_path, int flags,
> +struct file *backing_file_open(const struct file *user_file, int flags,
> const struct path *real_path,
> const struct cred *cred);
> -struct file *backing_tmpfile_open(const struct path *user_path, int flags,
> +struct file *backing_tmpfile_open(const struct file *user_file, int flags,
> const struct path *real_parentpath,
> umode_t mode, const struct cred *cred);
> ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 8b3dd145b25e..d0d0e8f55589 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2475,6 +2475,19 @@ struct file *dentry_create(struct path *path, int flags, umode_t mode,
> const struct cred *cred);
> const struct path *backing_file_user_path(const struct file *f);
>
> +#ifdef CONFIG_SECURITY
> +void *backing_file_security(const struct file *f);
> +void backing_file_set_security(struct file *f, void *security);
> +#else
> +static inline void *backing_file_security(const struct file *f)
> +{
> + return NULL;
> +}
> +static inline void backing_file_set_security(struct file *f, void *security)
> +{
> +}
> +#endif /* CONFIG_SECURITY */
> +
> /*
> * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
> * stored in ->vm_file is a backing file whose f_inode is on the underlying
> diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
> index 382c56a97bba..584db296e43b 100644
> --- a/include/linux/lsm_audit.h
> +++ b/include/linux/lsm_audit.h
> @@ -94,7 +94,7 @@ struct common_audit_data {
> #endif
> char *kmod_name;
> struct lsm_ioctlop_audit *op;
> - struct file *file;
> + const struct file *file;
> struct lsm_ibpkey_audit *ibpkey;
> struct lsm_ibendport_audit *ibendport;
> int reason;
> diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
> index 8c42b4bde09c..b4958167e381 100644
> --- a/include/linux/lsm_hook_defs.h
> +++ b/include/linux/lsm_hook_defs.h
> @@ -191,6 +191,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
> LSM_HOOK(int, 0, file_alloc_security, struct file *file)
> LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
> LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
> +LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
> + const struct file *user_file)
> +LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
> LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
> unsigned long arg)
> LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
> @@ -198,6 +201,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
> LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
> LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
> unsigned long prot, unsigned long flags)
> +LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
> + struct file *backing_file, struct file *user_file)
> LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
> unsigned long reqprot, unsigned long prot)
> LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
> diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
> index d48bf0ad26f4..b4f8cad53ddb 100644
> --- a/include/linux/lsm_hooks.h
> +++ b/include/linux/lsm_hooks.h
> @@ -104,6 +104,7 @@ struct security_hook_list {
> struct lsm_blob_sizes {
> unsigned int lbs_cred;
> unsigned int lbs_file;
> + unsigned int lbs_backing_file;
> unsigned int lbs_ib;
> unsigned int lbs_inode;
> unsigned int lbs_sock;
> diff --git a/include/linux/security.h b/include/linux/security.h
> index ee88dd2d2d1f..8d2d4856934e 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -472,11 +472,17 @@ int security_file_permission(struct file *file, int mask);
> int security_file_alloc(struct file *file);
> void security_file_release(struct file *file);
> void security_file_free(struct file *file);
> +int security_backing_file_alloc(struct file *backing_file,
> + const struct file *user_file);
> +void security_backing_file_free(struct file *backing_file);
> int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
> int security_file_ioctl_compat(struct file *file, unsigned int cmd,
> unsigned long arg);
> int security_mmap_file(struct file *file, unsigned long prot,
> unsigned long flags);
> +int security_mmap_backing_file(struct vm_area_struct *vma,
> + struct file *backing_file,
> + struct file *user_file);
> int security_mmap_addr(unsigned long addr);
> int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
> unsigned long prot);
> @@ -1141,6 +1147,15 @@ static inline void security_file_release(struct file *file)
> static inline void security_file_free(struct file *file)
> { }
>
> +static inline int security_backing_file_alloc(struct file *backing_file,
> + const struct file *user_file)
> +{
> + return 0;
> +}
> +
> +static inline void security_backing_file_free(struct file *backing_file)
> +{ }
> +
> static inline int security_file_ioctl(struct file *file, unsigned int cmd,
> unsigned long arg)
> {
> @@ -1160,6 +1175,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
> return 0;
> }
>
> +static inline int security_mmap_backing_file(struct vm_area_struct *vma,
> + struct file *backing_file,
> + struct file *user_file)
> +{
> + return 0;
> +}
> +
> static inline int security_mmap_addr(unsigned long addr)
> {
> return cap_mmap_addr(addr);
> diff --git a/security/lsm.h b/security/lsm.h
> index db77cc83e158..32f808ad4335 100644
> --- a/security/lsm.h
> +++ b/security/lsm.h
> @@ -29,6 +29,7 @@ extern struct lsm_blob_sizes blob_sizes;
>
> /* LSM blob caches */
> extern struct kmem_cache *lsm_file_cache;
> +extern struct kmem_cache *lsm_backing_file_cache;
> extern struct kmem_cache *lsm_inode_cache;
>
> /* LSM blob allocators */
> diff --git a/security/lsm_init.c b/security/lsm_init.c
> index 573e2a7250c4..7c0fd17f1601 100644
> --- a/security/lsm_init.c
> +++ b/security/lsm_init.c
> @@ -293,6 +293,8 @@ static void __init lsm_prepare(struct lsm_info *lsm)
> blobs = lsm->blobs;
> lsm_blob_size_update(&blobs->lbs_cred, &blob_sizes.lbs_cred);
> lsm_blob_size_update(&blobs->lbs_file, &blob_sizes.lbs_file);
> + lsm_blob_size_update(&blobs->lbs_backing_file,
> + &blob_sizes.lbs_backing_file);
> lsm_blob_size_update(&blobs->lbs_ib, &blob_sizes.lbs_ib);
> /* inode blob gets an rcu_head in addition to LSM blobs. */
> if (blobs->lbs_inode && blob_sizes.lbs_inode == 0)
> @@ -441,6 +443,8 @@ int __init security_init(void)
> if (lsm_debug) {
> lsm_pr("blob(cred) size %d\n", blob_sizes.lbs_cred);
> lsm_pr("blob(file) size %d\n", blob_sizes.lbs_file);
> + lsm_pr("blob(backing_file) size %d\n",
> + blob_sizes.lbs_backing_file);
> lsm_pr("blob(ib) size %d\n", blob_sizes.lbs_ib);
> lsm_pr("blob(inode) size %d\n", blob_sizes.lbs_inode);
> lsm_pr("blob(ipc) size %d\n", blob_sizes.lbs_ipc);
> @@ -462,6 +466,11 @@ int __init security_init(void)
> lsm_file_cache = kmem_cache_create("lsm_file_cache",
> blob_sizes.lbs_file, 0,
> SLAB_PANIC, NULL);
> + if (blob_sizes.lbs_backing_file)
> + lsm_backing_file_cache = kmem_cache_create(
> + "lsm_backing_file_cache",
> + blob_sizes.lbs_backing_file,
> + 0, SLAB_PANIC, NULL);
> if (blob_sizes.lbs_inode)
> lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
> blob_sizes.lbs_inode, 0,
> diff --git a/security/security.c b/security/security.c
> index a26c1474e2e4..048560ef6a1a 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -82,6 +82,7 @@ const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
> struct lsm_blob_sizes blob_sizes;
>
> struct kmem_cache *lsm_file_cache;
> +struct kmem_cache *lsm_backing_file_cache;
> struct kmem_cache *lsm_inode_cache;
>
> #define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
> @@ -173,6 +174,30 @@ static int lsm_file_alloc(struct file *file)
> return 0;
> }
>
> +/**
> + * lsm_backing_file_alloc - allocate a composite backing file blob
> + * @backing_file: the backing file
> + *
> + * Allocate the backing file blob for all the modules.
> + *
> + * Returns 0, or -ENOMEM if memory can't be allocated.
> + */
> +static int lsm_backing_file_alloc(struct file *backing_file)
> +{
> + void *blob;
> +
> + if (!lsm_backing_file_cache) {
> + backing_file_set_security(backing_file, NULL);
> + return 0;
> + }
> +
> + blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
> + backing_file_set_security(backing_file, blob);
> + if (!blob)
> + return -ENOMEM;
> + return 0;
> +}
> +
> /**
> * lsm_blob_alloc - allocate a composite blob
> * @dest: the destination for the blob
> @@ -2418,6 +2443,57 @@ void security_file_free(struct file *file)
> }
> }
>
> +/**
> + * security_backing_file_alloc() - Allocate and setup a backing file blob
> + * @backing_file: the backing file
> + * @user_file: the associated user visible file
> + *
> + * Allocate a backing file LSM blob and perform any necessary initialization of
> + * the LSM blob. There will be some operations where the LSM will not have
> + * access to @user_file after this point, so any important state associated
> + * with @user_file that is important to the LSM should be captured in the
> + * backing file's LSM blob.
> + *
> + * LSM's should avoid taking a reference to @user_file in this hook as it will
> + * result in problems later when the system attempts to drop/put the file
> + * references due to a circular dependency.
> + *
> + * Return: Return 0 if the hook is successful, negative values otherwise.
> + */
> +int security_backing_file_alloc(struct file *backing_file,
> + const struct file *user_file)
> +{
> + int rc;
> +
> + rc = lsm_backing_file_alloc(backing_file);
> + if (rc)
> + return rc;
> + rc = call_int_hook(backing_file_alloc, backing_file, user_file);
> + if (unlikely(rc))
> + security_backing_file_free(backing_file);
> +
> + return rc;
> +}
> +
> +/**
> + * security_backing_file_free() - Free a backing file blob
> + * @backing_file: the backing file
> + *
> + * Free any LSM state associate with a backing file's LSM blob, including the
> + * blob itself.
> + */
> +void security_backing_file_free(struct file *backing_file)
> +{
> + void *blob = backing_file_security(backing_file);
> +
> + call_void_hook(backing_file_free, backing_file);
> +
> + if (blob) {
> + backing_file_set_security(backing_file, NULL);
> + kmem_cache_free(lsm_backing_file_cache, blob);
> + }
> +}
> +
> /**
> * security_file_ioctl() - Check if an ioctl is allowed
> * @file: associated file
> @@ -2506,6 +2582,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
> flags);
> }
>
> +/**
> + * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
> + * @vma: the vm_area_struct for the mmap'd region
> + * @backing_file: the backing file being mmap'd
> + * @user_file: the user file being mmap'd
> + *
> + * Check permissions for a mmap operation on a stacked filesystem. This hook
> + * is called after the security_mmap_file() and is responsible for authorizing
> + * the mmap on @backing_file. It is important to note that the mmap operation
> + * on @user_file has already been authorized and the @vma->vm_file has been
> + * set to @backing_file.
> + *
> + * Return: Returns 0 if permission is granted.
> + */
> +int security_mmap_backing_file(struct vm_area_struct *vma,
> + struct file *backing_file,
> + struct file *user_file)
> +{
> + /* recommended by the stackable filesystem devs */
> + if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
> + return -EIO;
> +
> + return call_int_hook(mmap_backing_file, vma, backing_file, user_file);
> +}
> +EXPORT_SYMBOL_GPL(security_mmap_backing_file);
> +
> /**
> * security_mmap_addr() - Check if mmap'ing an address is allowed
> * @addr: address
> --
> 2.53.0
>
^ permalink raw reply
* [PATCH] apparmor: Fix two bugs of aa_setup_dfa_engine's fail handling
From: GONG Ruiqi @ 2026-04-03 3:51 UTC (permalink / raw)
To: John Johansen, Paul Moore, James Morris, Serge E . Hallyn
Cc: apparmor, linux-security-module, linux-kernel, lujialin4,
gongruiqi1
First, aa_dfa_unpack returns ERR_PTR not NULL when it fails, but
aa_put_dfa only checks NULL for its input, which would cause invalid
memory access in aa_put_dfa. Set nulldfa to NULL explicitly to fix that.
Second, aa_put_pdb calls aa_pdb_free_kref -> aa_free_pdb -> aa_put_dfa,
i.e. it will free nullpdb->dfa. But there's another aa_put_dfa(nulldfa)
after aa_put_pdb(nullpdb), which would cause double free. Remove that
redundant aa_put_dfa to fix that.
Fixes: 98b824ff8984 ("apparmor: refcount the pdb")
Signed-off-by: GONG Ruiqi <gongruiqi1@huawei.com>
---
security/apparmor/lsm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index c1d42fc72fdb..be82ec1b9fd9 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -2465,6 +2465,7 @@ static int __init aa_setup_dfa_engine(void)
TO_ACCEPT2_FLAG(YYTD_DATA32));
if (IS_ERR(nulldfa)) {
error = PTR_ERR(nulldfa);
+ nulldfa = NULL;
goto fail;
}
nullpdb->dfa = aa_get_dfa(nulldfa);
@@ -2486,7 +2487,6 @@ static int __init aa_setup_dfa_engine(void)
fail:
aa_put_pdb(nullpdb);
- aa_put_dfa(nulldfa);
nullpdb = NULL;
nulldfa = NULL;
stacksplitdfa = NULL;
--
2.43.0
^ permalink raw reply related
* [PATCH v4 3/3] selinux: fix overlayfs mmap() and mprotect() access checks
From: Paul Moore @ 2026-04-03 3:08 UTC (permalink / raw)
To: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs
Cc: Amir Goldstein, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-5-paul@paul-moore.com>
The existing SELinux security model for overlayfs is to allow access if
the current task is able to access the top level file (the "user" file)
and the mounter's credentials are sufficient to access the lower
level file (the "backing" file). Unfortunately, the current code does
not properly enforce these access controls for both mmap() and mprotect()
operations on overlayfs filesystems.
This patch makes use of the newly created security_mmap_backing_file()
LSM hook to provide the missing backing file enforcement for mmap()
operations, and leverages the backing file API and new LSM blob to
provide the necessary information to properly enforce the mprotect()
access controls.
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
security/selinux/hooks.c | 256 +++++++++++++++++++++---------
security/selinux/include/objsec.h | 11 ++
2 files changed, 196 insertions(+), 71 deletions(-)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d8224ea113d1..76e0fb7dcb36 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1745,6 +1745,60 @@ static inline int file_path_has_perm(const struct cred *cred,
static int bpf_fd_pass(const struct file *file, u32 sid);
#endif
+static int __file_has_perm(const struct cred *cred, const struct file *file,
+ u32 av, bool bf_user_file)
+
+{
+ struct common_audit_data ad;
+ struct inode *inode;
+ u32 ssid = cred_sid(cred);
+ u32 tsid_fd;
+ int rc;
+
+ if (bf_user_file) {
+ struct backing_file_security_struct *bfsec;
+ const struct path *path;
+
+ if (WARN_ON(!(file->f_mode & FMODE_BACKING)))
+ return -EIO;
+
+ bfsec = selinux_backing_file(file);
+ path = backing_file_user_path(file);
+ tsid_fd = bfsec->uf_sid;
+ inode = d_inode(path->dentry);
+
+ ad.type = LSM_AUDIT_DATA_PATH;
+ ad.u.path = *path;
+ } else {
+ struct file_security_struct *fsec = selinux_file(file);
+
+ tsid_fd = fsec->sid;
+ inode = file_inode(file);
+
+ ad.type = LSM_AUDIT_DATA_FILE;
+ ad.u.file = file;
+ }
+
+ if (ssid != tsid_fd) {
+ rc = avc_has_perm(ssid, tsid_fd, SECCLASS_FD, FD__USE, &ad);
+ if (rc)
+ return rc;
+ }
+
+#ifdef CONFIG_BPF_SYSCALL
+ /* regardless of backing vs user file, use the underlying file here */
+ rc = bpf_fd_pass(file, ssid);
+ if (rc)
+ return rc;
+#endif
+
+ /* av is zero if only checking access to the descriptor. */
+ if (av)
+ return inode_has_perm(cred, inode, av, &ad);
+
+ return 0;
+}
+
/* Check whether a task can use an open file descriptor to
access an inode in a given way. Check access to the
descriptor itself, and then use dentry_has_perm to
@@ -1753,41 +1807,10 @@ static int bpf_fd_pass(const struct file *file, u32 sid);
has the same SID as the process. If av is zero, then
access to the file is not checked, e.g. for cases
where only the descriptor is affected like seek. */
-static int file_has_perm(const struct cred *cred,
- struct file *file,
- u32 av)
+static inline int file_has_perm(const struct cred *cred,
+ const struct file *file, u32 av)
{
- struct file_security_struct *fsec = selinux_file(file);
- struct inode *inode = file_inode(file);
- struct common_audit_data ad;
- u32 sid = cred_sid(cred);
- int rc;
-
- ad.type = LSM_AUDIT_DATA_FILE;
- ad.u.file = file;
-
- if (sid != fsec->sid) {
- rc = avc_has_perm(sid, fsec->sid,
- SECCLASS_FD,
- FD__USE,
- &ad);
- if (rc)
- goto out;
- }
-
-#ifdef CONFIG_BPF_SYSCALL
- rc = bpf_fd_pass(file, cred_sid(cred));
- if (rc)
- return rc;
-#endif
-
- /* av is zero if only checking access to the descriptor. */
- rc = 0;
- if (av)
- rc = inode_has_perm(cred, inode, av, &ad);
-
-out:
- return rc;
+ return __file_has_perm(cred, file, av, false);
}
/*
@@ -3825,6 +3848,17 @@ static int selinux_file_alloc_security(struct file *file)
return 0;
}
+static int selinux_backing_file_alloc(struct file *backing_file,
+ const struct file *user_file)
+{
+ struct backing_file_security_struct *bfsec;
+
+ bfsec = selinux_backing_file(backing_file);
+ bfsec->uf_sid = selinux_file(user_file)->sid;
+
+ return 0;
+}
+
/*
* Check whether a task has the ioctl permission and cmd
* operation to an inode.
@@ -3942,42 +3976,55 @@ static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
static int default_noexec __ro_after_init;
-static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
+static int __file_map_prot_check(const struct cred *cred,
+ const struct file *file, unsigned long prot,
+ bool shared, bool bf_user_file)
{
- const struct cred *cred = current_cred();
- u32 sid = cred_sid(cred);
- int rc = 0;
+ struct inode *inode = NULL;
+ bool prot_exec = prot & PROT_EXEC;
+ bool prot_write = prot & PROT_WRITE;
+
+ if (file) {
+ if (bf_user_file)
+ inode = d_inode(backing_file_user_path(file)->dentry);
+ else
+ inode = file_inode(file);
+ }
+
+ if (default_noexec && prot_exec &&
+ (!file || IS_PRIVATE(inode) || (!shared && prot_write))) {
+ int rc;
+ u32 sid = cred_sid(cred);
- if (default_noexec &&
- (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
- (!shared && (prot & PROT_WRITE)))) {
/*
- * We are making executable an anonymous mapping or a
- * private file mapping that will also be writable.
- * This has an additional check.
+ * We are making executable an anonymous mapping or a private
+ * file mapping that will also be writable.
*/
- rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
- PROCESS__EXECMEM, NULL);
+ rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM,
+ NULL);
if (rc)
- goto error;
+ return rc;
}
if (file) {
- /* read access is always possible with a mapping */
+ /* "read" always possible, "write" only if shared */
u32 av = FILE__READ;
-
- /* write access only matters if the mapping is shared */
- if (shared && (prot & PROT_WRITE))
+ if (shared && prot_write)
av |= FILE__WRITE;
-
- if (prot & PROT_EXEC)
+ if (prot_exec)
av |= FILE__EXECUTE;
- return file_has_perm(cred, file, av);
+ return __file_has_perm(cred, file, av, bf_user_file);
}
-error:
- return rc;
+ return 0;
+}
+
+static inline int file_map_prot_check(const struct cred *cred,
+ const struct file *file,
+ unsigned long prot, bool shared)
+{
+ return __file_map_prot_check(cred, file, prot, shared, false);
}
static int selinux_mmap_addr(unsigned long addr)
@@ -3993,36 +4040,80 @@ static int selinux_mmap_addr(unsigned long addr)
return rc;
}
-static int selinux_mmap_file(struct file *file,
- unsigned long reqprot __always_unused,
- unsigned long prot, unsigned long flags)
+static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
+ unsigned long prot, bool shared)
{
- struct common_audit_data ad;
- int rc;
-
if (file) {
+ int rc;
+ struct common_audit_data ad;
+
ad.type = LSM_AUDIT_DATA_FILE;
ad.u.file = file;
- rc = inode_has_perm(current_cred(), file_inode(file),
- FILE__MAP, &ad);
+ rc = inode_has_perm(cred, file_inode(file), FILE__MAP, &ad);
if (rc)
return rc;
}
- return file_map_prot_check(file, prot,
- (flags & MAP_TYPE) == MAP_SHARED);
+ return file_map_prot_check(cred, file, prot, shared);
+}
+
+static int selinux_mmap_file(struct file *file,
+ unsigned long reqprot __always_unused,
+ unsigned long prot, unsigned long flags)
+{
+ return selinux_mmap_file_common(current_cred(), file, prot,
+ (flags & MAP_TYPE) == MAP_SHARED);
+}
+
+/**
+ * selinux_mmap_backing_file - Check mmap permissions on a backing file
+ * @vma: memory region
+ * @backing_file: stacked filesystem backing file
+ * @user_file: user visible file
+ *
+ * This is called after selinux_mmap_file() on stacked filesystems, and it
+ * is this function's responsibility to verify access to @backing_file and
+ * setup the SELinux state for possible later use in the mprotect() code path.
+ *
+ * By the time this function is called, mmap() access to @user_file has already
+ * been authorized and @vma->vm_file has been set to point to @backing_file.
+ *
+ * Return zero on success, negative values otherwise.
+ */
+static int selinux_mmap_backing_file(struct vm_area_struct *vma,
+ struct file *backing_file,
+ struct file *user_file __always_unused)
+{
+ unsigned long prot = 0;
+
+ /* translate vma->vm_flags perms into PROT perms */
+ if (vma->vm_flags & VM_READ)
+ prot |= PROT_READ;
+ if (vma->vm_flags & VM_WRITE)
+ prot |= PROT_WRITE;
+ if (vma->vm_flags & VM_EXEC)
+ prot |= PROT_EXEC;
+
+ return selinux_mmap_file_common(backing_file->f_cred, backing_file,
+ prot, vma->vm_flags & VM_SHARED);
}
static int selinux_file_mprotect(struct vm_area_struct *vma,
unsigned long reqprot __always_unused,
unsigned long prot)
{
+ int rc;
const struct cred *cred = current_cred();
u32 sid = cred_sid(cred);
+ const struct file *file = vma->vm_file;
+ bool backing_file;
+ bool shared = vma->vm_flags & VM_SHARED;
+
+ /* check if we need to trigger the "backing files are awful" mode */
+ backing_file = file && (file->f_mode & FMODE_BACKING);
if (default_noexec &&
(prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
- int rc = 0;
/*
* We don't use the vma_is_initial_heap() helper as it has
* a history of problems and is currently broken on systems
@@ -4036,11 +4127,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
vma->vm_end <= vma->vm_mm->brk) {
rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
PROCESS__EXECHEAP, NULL);
- } else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
+ if (rc)
+ return rc;
+ } else if (!file && (vma_is_initial_stack(vma) ||
vma_is_stack_for_current(vma))) {
rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
PROCESS__EXECSTACK, NULL);
- } else if (vma->vm_file && vma->anon_vma) {
+ if (rc)
+ return rc;
+ } else if (file && vma->anon_vma) {
/*
* We are making executable a file mapping that has
* had some COW done. Since pages might have been
@@ -4048,13 +4143,29 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
* modified content. This typically should only
* occur for text relocations.
*/
- rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
+ rc = __file_has_perm(cred, file, FILE__EXECMOD,
+ backing_file);
+ if (rc)
+ return rc;
+ if (backing_file) {
+ rc = file_has_perm(file->f_cred, file,
+ FILE__EXECMOD);
+ if (rc)
+ return rc;
+ }
}
+ }
+
+ rc = __file_map_prot_check(cred, file, prot, shared, backing_file);
+ if (rc)
+ return rc;
+ if (backing_file) {
+ rc = file_map_prot_check(file->f_cred, file, prot, shared);
if (rc)
return rc;
}
- return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
+ return 0;
}
static int selinux_file_lock(struct file *file, unsigned int cmd)
@@ -7393,6 +7504,7 @@ struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
.lbs_cred = sizeof(struct cred_security_struct),
.lbs_task = sizeof(struct task_security_struct),
.lbs_file = sizeof(struct file_security_struct),
+ .lbs_backing_file = sizeof(struct backing_file_security_struct),
.lbs_inode = sizeof(struct inode_security_struct),
.lbs_ipc = sizeof(struct ipc_security_struct),
.lbs_key = sizeof(struct key_security_struct),
@@ -7498,9 +7610,11 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
LSM_HOOK_INIT(file_permission, selinux_file_permission),
LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
+ LSM_HOOK_INIT(backing_file_alloc, selinux_backing_file_alloc),
LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
+ LSM_HOOK_INIT(mmap_backing_file, selinux_mmap_backing_file),
LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
LSM_HOOK_INIT(file_lock, selinux_file_lock),
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 5bddd28ea5cb..b19e5d978e82 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -88,6 +88,10 @@ struct file_security_struct {
u32 pseqno; /* Policy seqno at the time of file open */
};
+struct backing_file_security_struct {
+ u32 uf_sid; /* associated user file fsec->sid */
+};
+
struct superblock_security_struct {
u32 sid; /* SID of file system superblock */
u32 def_sid; /* default SID for labeling */
@@ -195,6 +199,13 @@ static inline struct file_security_struct *selinux_file(const struct file *file)
return file->f_security + selinux_blob_sizes.lbs_file;
}
+static inline struct backing_file_security_struct *
+selinux_backing_file(const struct file *backing_file)
+{
+ void *blob = backing_file_security(backing_file);
+ return blob + selinux_blob_sizes.lbs_backing_file;
+}
+
static inline struct inode_security_struct *
selinux_inode(const struct inode *inode)
{
--
2.53.0
^ permalink raw reply related
* [PATCH v4 2/3] lsm: add backing_file LSM hooks
From: Paul Moore @ 2026-04-03 3:08 UTC (permalink / raw)
To: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs
Cc: Amir Goldstein, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-5-paul@paul-moore.com>
Stacked filesystems such as overlayfs do not currently provide the
necessary mechanisms for LSMs to properly enforce access controls on the
mmap() and mprotect() operations. In order to resolve this gap, a LSM
security blob is being added to the backing_file struct and the following
new LSM hooks are being created:
security_backing_file_alloc()
security_backing_file_free()
security_mmap_backing_file()
The first two hooks are to manage the lifecycle of the LSM security blob
in the backing_file struct, while the third provides a new mmap() access
control point for the underlying backing file. It is also expected that
LSMs will likely want to update their security_file_mprotect() callback
to address issues with their mprotect() controls, but that does not
require a change to the security_file_mprotect() LSM hook.
There are a three other small changes to support these new LSM hooks:
* Pass the user file associated with a backing file down to
alloc_empty_backing_file() so it can be included in the
security_backing_file_alloc() hook.
* Add getter and setter functions for the backing_file struct LSM blob
as the backing_file struct remains private to fs/file_table.c.
* Constify the file struct field in the LSM common_audit_data struct to
better support LSMs that need to pass a const file struct pointer into
the common LSM audit code.
Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
and supplying a fixup.
Cc: stable@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-unionfs@vger.kernel.org
Cc: linux-erofs@lists.ozlabs.org
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
fs/backing-file.c | 18 ++++--
fs/erofs/ishare.c | 10 +++-
fs/file_table.c | 27 +++++++--
fs/fuse/passthrough.c | 2 +-
fs/internal.h | 3 +-
fs/overlayfs/dir.c | 2 +-
fs/overlayfs/file.c | 2 +-
include/linux/backing-file.h | 4 +-
include/linux/fs.h | 13 +++++
include/linux/lsm_audit.h | 2 +-
include/linux/lsm_hook_defs.h | 5 ++
include/linux/lsm_hooks.h | 1 +
include/linux/security.h | 22 ++++++++
security/lsm.h | 1 +
security/lsm_init.c | 9 +++
security/security.c | 102 ++++++++++++++++++++++++++++++++++
16 files changed, 206 insertions(+), 17 deletions(-)
diff --git a/fs/backing-file.c b/fs/backing-file.c
index 45da8600d564..1f3bbfc75882 100644
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@ -12,6 +12,7 @@
#include <linux/backing-file.h>
#include <linux/splice.h>
#include <linux/mm.h>
+#include <linux/security.h>
#include "internal.h"
@@ -29,14 +30,15 @@
* returned file into a container structure that also stores the stacked
* file's path, which can be retrieved using backing_file_user_path().
*/
-struct file *backing_file_open(const struct path *user_path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
const struct path *real_path,
const struct cred *cred)
{
+ const struct path *user_path = &user_file->f_path;
struct file *f;
int error;
- f = alloc_empty_backing_file(flags, cred);
+ f = alloc_empty_backing_file(flags, cred, user_file);
if (IS_ERR(f))
return f;
@@ -52,15 +54,16 @@ struct file *backing_file_open(const struct path *user_path, int flags,
}
EXPORT_SYMBOL_GPL(backing_file_open);
-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
+struct file *backing_tmpfile_open(const struct file *user_file, int flags,
const struct path *real_parentpath,
umode_t mode, const struct cred *cred)
{
struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
+ const struct path *user_path = &user_file->f_path;
struct file *f;
int error;
- f = alloc_empty_backing_file(flags, cred);
+ f = alloc_empty_backing_file(flags, cred, user_file);
if (IS_ERR(f))
return f;
@@ -336,8 +339,13 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
vma_set_file(vma, file);
- scoped_with_creds(ctx->cred)
+ scoped_with_creds(ctx->cred) {
+ ret = security_mmap_backing_file(vma, file, user_file);
+ if (ret)
+ return ret;
+
ret = vfs_mmap(vma->vm_file, vma);
+ }
if (ctx->accessed)
ctx->accessed(user_file);
diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
index ec433bacc592..6ed66b17359b 100644
--- a/fs/erofs/ishare.c
+++ b/fs/erofs/ishare.c
@@ -4,6 +4,7 @@
*/
#include <linux/xxhash.h>
#include <linux/mount.h>
+#include <linux/security.h>
#include "internal.h"
#include "xattr.h"
@@ -106,7 +107,8 @@ static int erofs_ishare_file_open(struct inode *inode, struct file *file)
if (file->f_flags & O_DIRECT)
return -EINVAL;
- realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred());
+ realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred(),
+ file);
if (IS_ERR(realfile))
return PTR_ERR(realfile);
ihold(sharedinode);
@@ -150,8 +152,14 @@ static ssize_t erofs_ishare_file_read_iter(struct kiocb *iocb,
static int erofs_ishare_mmap(struct file *file, struct vm_area_struct *vma)
{
struct file *realfile = file->private_data;
+ int err;
vma_set_file(vma, realfile);
+
+ err = security_mmap_backing_file(vma, realfile, file);
+ if (err)
+ return err;
+
return generic_file_readonly_mmap(file, vma);
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 3b3792903185..d19d879b6efc 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -50,6 +50,9 @@ struct backing_file {
struct path user_path;
freeptr_t bf_freeptr;
};
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
};
#define backing_file(f) container_of(f, struct backing_file, file)
@@ -66,8 +69,21 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
}
EXPORT_SYMBOL_GPL(backing_file_set_user_path);
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f)
+{
+ return backing_file(f)->security;
+}
+
+void backing_file_set_security(struct file *f, void *security)
+{
+ backing_file(f)->security = security;
+}
+#endif /* CONFIG_SECURITY */
+
static inline void backing_file_free(struct backing_file *ff)
{
+ security_backing_file_free(&ff->file);
path_put(&ff->user_path);
kmem_cache_free(bfilp_cachep, ff);
}
@@ -288,10 +304,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
return f;
}
-static int init_backing_file(struct backing_file *ff)
+static int init_backing_file(struct backing_file *ff,
+ const struct file *user_file)
{
memset(&ff->user_path, 0, sizeof(ff->user_path));
- return 0;
+ backing_file_set_security(&ff->file, NULL);
+ return security_backing_file_alloc(&ff->file, user_file);
}
/*
@@ -301,7 +319,8 @@ static int init_backing_file(struct backing_file *ff)
* This is only for kernel internal use, and the allocate file must not be
* installed into file tables or such.
*/
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+ const struct file *user_file)
{
struct backing_file *ff;
int error;
@@ -318,7 +337,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
/* The f_mode flags must be set before fput(). */
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
- error = init_backing_file(ff);
+ error = init_backing_file(ff, user_file);
if (unlikely(error)) {
fput(&ff->file);
return ERR_PTR(error);
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 72de97c03d0e..f2d08ac2459b 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -167,7 +167,7 @@ struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id)
goto out;
/* Allocate backing file per fuse file to store fuse path */
- backing_file = backing_file_open(&file->f_path, file->f_flags,
+ backing_file = backing_file_open(file, file->f_flags,
&fb->file->f_path, fb->cred);
err = PTR_ERR(backing_file);
if (IS_ERR(backing_file)) {
diff --git a/fs/internal.h b/fs/internal.h
index cbc384a1aa09..77e90e4124e0 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,7 +106,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
*/
struct file *alloc_empty_file(int flags, const struct cred *cred);
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+ const struct file *user_file);
void backing_file_set_user_path(struct file *f, const struct path *path);
static inline void file_put_write_access(struct file *file)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ff3dbd1ca61f..f2f20a611af3 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1374,7 +1374,7 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
return PTR_ERR(cred);
ovl_path_upper(dentry->d_parent, &realparentpath);
- realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath,
+ realfile = backing_tmpfile_open(file, flags, &realparentpath,
mode, current_cred());
err = PTR_ERR_OR_ZERO(realfile);
pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 97bed2286030..27cc07738f33 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -48,7 +48,7 @@ static struct file *ovl_open_realfile(const struct file *file,
if (!inode_owner_or_capable(real_idmap, realinode))
flags &= ~O_NOATIME;
- realfile = backing_file_open(file_user_path(file),
+ realfile = backing_file_open(file,
flags, realpath, current_cred());
}
}
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
index 1476a6ed1bfd..c939cd222730 100644
--- a/include/linux/backing-file.h
+++ b/include/linux/backing-file.h
@@ -18,10 +18,10 @@ struct backing_file_ctx {
void (*end_write)(struct kiocb *iocb, ssize_t);
};
-struct file *backing_file_open(const struct path *user_path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
const struct path *real_path,
const struct cred *cred);
-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
+struct file *backing_tmpfile_open(const struct file *user_file, int flags,
const struct path *real_parentpath,
umode_t mode, const struct cred *cred);
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..d0d0e8f55589 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2475,6 +2475,19 @@ struct file *dentry_create(struct path *path, int flags, umode_t mode,
const struct cred *cred);
const struct path *backing_file_user_path(const struct file *f);
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f);
+void backing_file_set_security(struct file *f, void *security);
+#else
+static inline void *backing_file_security(const struct file *f)
+{
+ return NULL;
+}
+static inline void backing_file_set_security(struct file *f, void *security)
+{
+}
+#endif /* CONFIG_SECURITY */
+
/*
* When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
* stored in ->vm_file is a backing file whose f_inode is on the underlying
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 382c56a97bba..584db296e43b 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -94,7 +94,7 @@ struct common_audit_data {
#endif
char *kmod_name;
struct lsm_ioctlop_audit *op;
- struct file *file;
+ const struct file *file;
struct lsm_ibpkey_audit *ibpkey;
struct lsm_ibendport_audit *ibendport;
int reason;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 8c42b4bde09c..b4958167e381 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -191,6 +191,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
LSM_HOOK(int, 0, file_alloc_security, struct file *file)
LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
+LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
+ const struct file *user_file)
+LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
unsigned long arg)
LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
@@ -198,6 +201,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags)
+LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
+ struct file *backing_file, struct file *user_file)
LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
unsigned long reqprot, unsigned long prot)
LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d48bf0ad26f4..b4f8cad53ddb 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -104,6 +104,7 @@ struct security_hook_list {
struct lsm_blob_sizes {
unsigned int lbs_cred;
unsigned int lbs_file;
+ unsigned int lbs_backing_file;
unsigned int lbs_ib;
unsigned int lbs_inode;
unsigned int lbs_sock;
diff --git a/include/linux/security.h b/include/linux/security.h
index ee88dd2d2d1f..8d2d4856934e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -472,11 +472,17 @@ int security_file_permission(struct file *file, int mask);
int security_file_alloc(struct file *file);
void security_file_release(struct file *file);
void security_file_free(struct file *file);
+int security_backing_file_alloc(struct file *backing_file,
+ const struct file *user_file);
+void security_backing_file_free(struct file *backing_file);
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int security_file_ioctl_compat(struct file *file, unsigned int cmd,
unsigned long arg);
int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags);
+int security_mmap_backing_file(struct vm_area_struct *vma,
+ struct file *backing_file,
+ struct file *user_file);
int security_mmap_addr(unsigned long addr);
int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
unsigned long prot);
@@ -1141,6 +1147,15 @@ static inline void security_file_release(struct file *file)
static inline void security_file_free(struct file *file)
{ }
+static inline int security_backing_file_alloc(struct file *backing_file,
+ const struct file *user_file)
+{
+ return 0;
+}
+
+static inline void security_backing_file_free(struct file *backing_file)
+{ }
+
static inline int security_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1160,6 +1175,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
return 0;
}
+static inline int security_mmap_backing_file(struct vm_area_struct *vma,
+ struct file *backing_file,
+ struct file *user_file)
+{
+ return 0;
+}
+
static inline int security_mmap_addr(unsigned long addr)
{
return cap_mmap_addr(addr);
diff --git a/security/lsm.h b/security/lsm.h
index db77cc83e158..32f808ad4335 100644
--- a/security/lsm.h
+++ b/security/lsm.h
@@ -29,6 +29,7 @@ extern struct lsm_blob_sizes blob_sizes;
/* LSM blob caches */
extern struct kmem_cache *lsm_file_cache;
+extern struct kmem_cache *lsm_backing_file_cache;
extern struct kmem_cache *lsm_inode_cache;
/* LSM blob allocators */
diff --git a/security/lsm_init.c b/security/lsm_init.c
index 573e2a7250c4..7c0fd17f1601 100644
--- a/security/lsm_init.c
+++ b/security/lsm_init.c
@@ -293,6 +293,8 @@ static void __init lsm_prepare(struct lsm_info *lsm)
blobs = lsm->blobs;
lsm_blob_size_update(&blobs->lbs_cred, &blob_sizes.lbs_cred);
lsm_blob_size_update(&blobs->lbs_file, &blob_sizes.lbs_file);
+ lsm_blob_size_update(&blobs->lbs_backing_file,
+ &blob_sizes.lbs_backing_file);
lsm_blob_size_update(&blobs->lbs_ib, &blob_sizes.lbs_ib);
/* inode blob gets an rcu_head in addition to LSM blobs. */
if (blobs->lbs_inode && blob_sizes.lbs_inode == 0)
@@ -441,6 +443,8 @@ int __init security_init(void)
if (lsm_debug) {
lsm_pr("blob(cred) size %d\n", blob_sizes.lbs_cred);
lsm_pr("blob(file) size %d\n", blob_sizes.lbs_file);
+ lsm_pr("blob(backing_file) size %d\n",
+ blob_sizes.lbs_backing_file);
lsm_pr("blob(ib) size %d\n", blob_sizes.lbs_ib);
lsm_pr("blob(inode) size %d\n", blob_sizes.lbs_inode);
lsm_pr("blob(ipc) size %d\n", blob_sizes.lbs_ipc);
@@ -462,6 +466,11 @@ int __init security_init(void)
lsm_file_cache = kmem_cache_create("lsm_file_cache",
blob_sizes.lbs_file, 0,
SLAB_PANIC, NULL);
+ if (blob_sizes.lbs_backing_file)
+ lsm_backing_file_cache = kmem_cache_create(
+ "lsm_backing_file_cache",
+ blob_sizes.lbs_backing_file,
+ 0, SLAB_PANIC, NULL);
if (blob_sizes.lbs_inode)
lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
blob_sizes.lbs_inode, 0,
diff --git a/security/security.c b/security/security.c
index a26c1474e2e4..048560ef6a1a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -82,6 +82,7 @@ const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
struct lsm_blob_sizes blob_sizes;
struct kmem_cache *lsm_file_cache;
+struct kmem_cache *lsm_backing_file_cache;
struct kmem_cache *lsm_inode_cache;
#define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
@@ -173,6 +174,30 @@ static int lsm_file_alloc(struct file *file)
return 0;
}
+/**
+ * lsm_backing_file_alloc - allocate a composite backing file blob
+ * @backing_file: the backing file
+ *
+ * Allocate the backing file blob for all the modules.
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_backing_file_alloc(struct file *backing_file)
+{
+ void *blob;
+
+ if (!lsm_backing_file_cache) {
+ backing_file_set_security(backing_file, NULL);
+ return 0;
+ }
+
+ blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
+ backing_file_set_security(backing_file, blob);
+ if (!blob)
+ return -ENOMEM;
+ return 0;
+}
+
/**
* lsm_blob_alloc - allocate a composite blob
* @dest: the destination for the blob
@@ -2418,6 +2443,57 @@ void security_file_free(struct file *file)
}
}
+/**
+ * security_backing_file_alloc() - Allocate and setup a backing file blob
+ * @backing_file: the backing file
+ * @user_file: the associated user visible file
+ *
+ * Allocate a backing file LSM blob and perform any necessary initialization of
+ * the LSM blob. There will be some operations where the LSM will not have
+ * access to @user_file after this point, so any important state associated
+ * with @user_file that is important to the LSM should be captured in the
+ * backing file's LSM blob.
+ *
+ * LSM's should avoid taking a reference to @user_file in this hook as it will
+ * result in problems later when the system attempts to drop/put the file
+ * references due to a circular dependency.
+ *
+ * Return: Return 0 if the hook is successful, negative values otherwise.
+ */
+int security_backing_file_alloc(struct file *backing_file,
+ const struct file *user_file)
+{
+ int rc;
+
+ rc = lsm_backing_file_alloc(backing_file);
+ if (rc)
+ return rc;
+ rc = call_int_hook(backing_file_alloc, backing_file, user_file);
+ if (unlikely(rc))
+ security_backing_file_free(backing_file);
+
+ return rc;
+}
+
+/**
+ * security_backing_file_free() - Free a backing file blob
+ * @backing_file: the backing file
+ *
+ * Free any LSM state associate with a backing file's LSM blob, including the
+ * blob itself.
+ */
+void security_backing_file_free(struct file *backing_file)
+{
+ void *blob = backing_file_security(backing_file);
+
+ call_void_hook(backing_file_free, backing_file);
+
+ if (blob) {
+ backing_file_set_security(backing_file, NULL);
+ kmem_cache_free(lsm_backing_file_cache, blob);
+ }
+}
+
/**
* security_file_ioctl() - Check if an ioctl is allowed
* @file: associated file
@@ -2506,6 +2582,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
flags);
}
+/**
+ * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
+ * @vma: the vm_area_struct for the mmap'd region
+ * @backing_file: the backing file being mmap'd
+ * @user_file: the user file being mmap'd
+ *
+ * Check permissions for a mmap operation on a stacked filesystem. This hook
+ * is called after the security_mmap_file() and is responsible for authorizing
+ * the mmap on @backing_file. It is important to note that the mmap operation
+ * on @user_file has already been authorized and the @vma->vm_file has been
+ * set to @backing_file.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_mmap_backing_file(struct vm_area_struct *vma,
+ struct file *backing_file,
+ struct file *user_file)
+{
+ /* recommended by the stackable filesystem devs */
+ if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
+ return -EIO;
+
+ return call_int_hook(mmap_backing_file, vma, backing_file, user_file);
+}
+EXPORT_SYMBOL_GPL(security_mmap_backing_file);
+
/**
* security_mmap_addr() - Check if mmap'ing an address is allowed
* @addr: address
--
2.53.0
^ permalink raw reply related
* [PATCH v4 1/3] fs: prepare for adding LSM blob to backing_file
From: Paul Moore @ 2026-04-03 3:08 UTC (permalink / raw)
To: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs
Cc: Amir Goldstein, Gao Xiang, Christian Brauner
In-Reply-To: <20260403030848.731867-5-paul@paul-moore.com>
From: Amir Goldstein <amir73il@gmail.com>
In preparation to adding LSM blob to backing_file struct, factor out
helpers init_backing_file() and backing_file_free().
Cc: stable@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-unionfs@vger.kernel.org
Cc: linux-erofs@lists.ozlabs.org
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
[PM: use the term "LSM blob", fix comment style to match file]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
fs/file_table.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/fs/file_table.c b/fs/file_table.c
index aaa5faaace1e..3b3792903185 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -66,6 +66,12 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
}
EXPORT_SYMBOL_GPL(backing_file_set_user_path);
+static inline void backing_file_free(struct backing_file *ff)
+{
+ path_put(&ff->user_path);
+ kmem_cache_free(bfilp_cachep, ff);
+}
+
static inline void file_free(struct file *f)
{
security_file_free(f);
@@ -73,8 +79,7 @@ static inline void file_free(struct file *f)
percpu_counter_dec(&nr_files);
put_cred(f->f_cred);
if (unlikely(f->f_mode & FMODE_BACKING)) {
- path_put(backing_file_user_path(f));
- kmem_cache_free(bfilp_cachep, backing_file(f));
+ backing_file_free(backing_file(f));
} else {
kmem_cache_free(filp_cachep, f);
}
@@ -283,6 +288,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
return f;
}
+static int init_backing_file(struct backing_file *ff)
+{
+ memset(&ff->user_path, 0, sizeof(ff->user_path));
+ return 0;
+}
+
/*
* Variant of alloc_empty_file() that allocates a backing_file container
* and doesn't check and modify nr_files.
@@ -305,7 +316,14 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
return ERR_PTR(error);
}
+ /* The f_mode flags must be set before fput(). */
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
+ error = init_backing_file(ff);
+ if (unlikely(error)) {
+ fput(&ff->file);
+ return ERR_PTR(error);
+ }
+
return &ff->file;
}
EXPORT_SYMBOL_GPL(alloc_empty_backing_file);
--
2.53.0
^ permalink raw reply related
* [PATCH v4 0/3] Fix incorrect overlayfs mmap() and mprotect() LSM access controls
From: Paul Moore @ 2026-04-03 3:08 UTC (permalink / raw)
To: linux-security-module, selinux, linux-fsdevel, linux-unionfs,
linux-erofs
Cc: Amir Goldstein, Gao Xiang, Christian Brauner
Another week, another revision to this patchset. The v3 revision can be
found at the lore[1] link below.
The revision still takes the same basic approach introduced in v2, with
the most significant change in v4 being the change to make the backing
file LSM blob conditional on CONFIG_SECURITY. This requires a number of
other changes to ensure that all accesses of the LSM blob go through a
set of accessor functions which can be converted into dummy functions
when !CONFIG_SECURITY.
While the changes between v3 and v4 were fairly straight forward, there
were enough of them that it felt wrong to preserve the ACKs from previous
revisions. It would be appreciated if those of you who had previously
ACK'd a patch could take a second look and renew your ACK (or comment on
the problem preventing you from ACK'ing).
Thanks all.
[1] https://lore.kernel.org/linux-security-module/20260327220446.353103-4-paul@paul-moore.com/
--
CHANGELOG:
v4:
- added fs prep patch (Amir)
- added CONFIG_SECURITY conditional code (Amir)
v3:
- fix the LSM hook stubs (kernel robot, Ryan Lee)
- fix the lsm_backing_file_cache allocation size (Ryan Lee)
- minor style, simplicity tweaks to the SELinux patch
v2:
- remove the user O_PATH file patch from Amir
- add the backing_file LSM blob and lifecycle hooks
- update the SELinux code to reflect the other changes
v1:
- initial version
--
Amir Goldstein (1):
fs: prepare for adding LSM blob to backing_file
Paul Moore (2):
lsm: add backing_file LSM hooks
selinux: fix overlayfs mmap() and mprotect() access checks
fs/backing-file.c | 18 +-
fs/erofs/ishare.c | 10 +
fs/file_table.c | 43 ++++-
fs/fuse/passthrough.c | 2
fs/internal.h | 3
fs/overlayfs/dir.c | 2
fs/overlayfs/file.c | 2
include/linux/backing-file.h | 4
include/linux/fs.h | 13 +
include/linux/lsm_audit.h | 2
include/linux/lsm_hook_defs.h | 5
include/linux/lsm_hooks.h | 1
include/linux/security.h | 22 ++
security/lsm.h | 1
security/lsm_init.c | 9 +
security/security.c | 102 +++++++++++
security/selinux/hooks.c | 256 +++++++++++++++++++++---------
security/selinux/include/objsec.h | 11 +
18 files changed, 419 insertions(+), 87 deletions(-)
^ permalink raw reply
* Re: LSM namespacing API
From: Paul Moore @ 2026-04-02 21:04 UTC (permalink / raw)
To: Dr. Greg
Cc: Stephen Smalley, Ondrej Mosnacek, linux-security-module, selinux,
John Johansen
In-Reply-To: <ac5MKr4lFQhc44i6@wind.enjellic.com>
On Thu, Apr 2, 2026 at 7:00 AM Dr. Greg <greg@enjellic.com> wrote:
> On Sun, Mar 29, 2026 at 08:56:37PM -0400, Paul Moore wrote:
> > On Sun, Mar 29, 2026 at 12:09???PM Dr. Greg <greg@enjellic.com> wrote:
> > > On Tue, Mar 24, 2026 at 05:31:09PM -0400, Paul Moore wrote:
> > > > On Tue, Mar 3, 2026 at 11:46???AM Paul Moore <paul@paul-moore.com> wrote:
...
> Christian had proposed patches for a generic mechanism to create
> LSM security namespace blobs, is implementation of that in scope for
> this effort?
That isn't what Christian proposed, although I can understand how a
quick glance at the patchset would lead you to believe that (I had the
same misunderstanding while skimming my inbox on my phone while
traveling). I suggest reviewing Christian's post again as well as the
related Landlock patchset which is the first to use the hooks
Christian proposed.
> > > It would seem that the flags variable might be a good option to use to
> > > handle this 2-stage transition, for example LSM_NS_INIT and
> > > LSM_NS_CHANGE, respectively, to specify the initialization and
> > > execution phases of the transition.
>
> > No. The lsm_unshare() syscall is intended to mimic the existing
> > unshare() syscall as a single step process from a user's
> > perspective. If it returns successfully the caller will be in a new
> > LSM namespace as defined by the individual LSM specified in the
> > syscall.
>
> OK, we can reason forward with that paradigm.
>
> An orchestrator issues the unshare call for an LSM namespace and upon
> return from the system call the calling task is in a new namespace for
> that particular LSM ...
Yes.
> ... the goal of which is presumably to implement a
> security policy/model different than what had been in force
> previously.
Maybe. That is dependent on the individual LSM, I don't want to
encode any assumptions on this at the LSM framework layer.
> So the process is in a new LSM specific namespace, but still
> implementing the policy from the previous namespace, until the
> orchestrator can load the new policy and then trigger the LSM to
> change from its previous policy to the newly loaded policy.
>
> Is this consistent with your vision as to how all of this will work?
No. What an individual LSM does upon creation of a new namespace via
lsm_unshare() is entirely up to that LSM. The LSM may choose to bound
the new namespace by the parent's policy, or it may choose a
non-hierarchical relationship where the new namespace remains entirely
separate from the parent. The LSM may start the new namespace in an
uninitialized state (similar to early boot), initialized with a
default policy, initialized with the parent's policy, or something
else.
> > > The other unanswered issue, or perhaps we missed it, are the security
> > > controls that should be associated with the unshare call.
>
> > Each LSM is free to implement whatever access controls it deems
> > necessary in its lsm_unshare() callback.
>
> Just to be clear.
>
> When you refer to 'lsm_unshare() callback' are you referring to a new
> LSM security hook to be be implemented that will allow all of the
> active LSM's to pass judgement on whether or not the unshare should be
> allowed to complete successfully?
No. The lsm_unshare() callback is the individual LSM provided
function that the LSM framework calls when the lsm_unshare() syscall
is invoked. Put another way, the lsm_unshare() callback is the
function specified by a LSM, using the LSM_HOOK_INIT() macro, that is
called by the lsm_unshare() syscall.
> > > Will there be a new LSM hook that allows other LSM's to veto the
> > > creation of a namespace either for itself or for another LSM?
> >
> > I would expect the lsm_unshare() syscall to operate similarly to the
> > lsm_set_self_attr() syscall in this regard.
>
> The reference to handling this like lsm_set_self_attr() is unclear.
>
> With lsm_set_self_attr() there is no reason for another LSM to deny
> setting what is an LSM specific attribute, as you note above, each LSM
> gets to decide if the request to set an attribute for the LSM should
> be accepted or denied.
No. LSM "A" gets to decide if LSM "A" can create a new namespace
using the lsm_unshare() syscall, LSM "B" does not get to enforce any
policy on LSM "A"'s decision.
> Since lsm_unshare() is changing the overall platform security state,
> it seems consistent with the design of the LSM for other LSM's to be
> able to veto this action.
No. This is not consistent with either the design or general
conventions associated with LSM development.
> Once again, this seems like an action that would be consistent with
> the notion of the lockdown LSM,
No.
> > > Should there be an option to completely compile LSM namespaces out of
> > > the kernel?
>
> > That doesn't belong in the LSM framework layer, that is up to the
> > individual LSMs.
>
> You noted above the desire for lsm_unshare to be consistent with other
> namespaces.
>
> The current kernel paradigm is to allow classes of namespace
> resources, ie. CONFIG_UTS_NS, CONFIG_TIME_NS et.al., to be compiled in
> our out of the kernel.
>
> It seems that CONFIG_LSM_NS would be consistent with that model.
CONFIG_UTS_NS does not have multiple radically different
implementations underneath it. Comparing any of the existing Kconfig
namespace knobs to what we are attempting to do with the LSM framework
is going to be difficult due to some inherent differences between the
two things.
The lsm_unshare() syscall is simply an API abstraction intended to
make it easier for userspace to interact with the individual LSMs;
instead of dealing with multiple different namespacing APIs, one for
each LSM, lsm_unshare() provides a single interface to make app devs'
lives easier.
If a individual LSM wants to provide a Kconfig knob to toggle their
namespace support they are welcome to do so, lsm_unshare() should
exist regardless and return an error code if the desired LSM does not
implement namespace support in the particular kernel build.
> > > > * Implement /proc/pid/ns/lsm and setns(CLONE_NEWLSM)
> > > >
> > > > As discussed previously, this allows us to move a process into an
> > > > existing, established LSM namespace set. The caller cannot
> > > > selectively choose which individual LSM namespaces they join from the
> > > > given LSM namespace set, they receive the same LSM namespace
> > > > configuration as the target process.
> > >
> > > As an initial aside. It would be assumed that a positive result of a
> > > setns call would be to cause the calling process to atomically change
> > > its security namespace set. This would further suggest the need to
> > > have the security namespace creation process also execute atomically
> > > in a multi-LSM namespace change environment.
>
> > In the setns case no new LSM namespaces should be created, the process
> > simply joins an existing set of LSM namespaces.
>
> The issue isn't about new namespaces being created, the issue is
> atomicity of a change to a new set of security policies.
>
> With setns an atomic transition is implemented.
>
> The proposed lsm_unshare() behavior results in a period of time when
> multiple and varying security policies are active, depending on
> various race issues in the orchestrator implementation.
>
> This opens the door to a raft of potential security issues that we can
> have a new acronym for, Time Of Implementation Time Of Use (TOITOU).
I would expect that any LSM implementing namespaces would have
sufficient protections/locking in place to ensure that processes and
namespaces remain in a consistent state outside of the
protected/locked regions. It is reasonable for one process to attempt
the creation of a new namespace while another attempts to join the
namespace of the process creating the new namespace. This is not
really a new problem in systems programming, and is one reason why
synchronization mechanisms exist. Once again, we do not want to force
any particular solution at the LSM framework layer as the
synchonization mechanisms will likely be very LSM dependent.
> > > ... That is the concept of whether or not a setns
> > > call, for any resource namespace, should also force a security
> > > namespace change if the security namespace of the calling process
> > > differs from that of the target process.
>
> > That decision is left to the individual LSMs.
>
> That is reasonable.
>
> In order to support that model, there would seem to be a need to have
> a new LSM call in the setns code that allows LSM's to determine
> whether or not a change in the active security namespace set should be
> forced, correct?
Possibly. I think we need to see some RFC code to see how this would
look, but I think the LSM implementation inside the setns() syscall
would need to be done in two stages: the first to "prepare" the join
operation where permissions checks are performed (if desired by the
individual LSM) and any operations that could fail are done; the
second stage would be very basic and simply finish the join operation
without any risk of failure. An individual LSM could fail the join
operation for a variety of reasons in stage 1, causing the entire
setns() operation to fail, but once we progress to stage 2 the
operation should succeed.
At this point I'm not too bothered by how we do this as it is an
implementation detail buried within the setns() implementation and not
really an API issue. We could create a single LSM hook that is called
within sys_setns(), or we could leverage the existing two-stage
process within sys_setns() and implement the two LSM stages as two LSM
hooks. The first option would be more complicated from a LSM
perspective, but cleaner from a nsproxy.c perspective (that alone
could make it the more preferable option). The latter option would
result in cleaner, thinner LSM hooks, but it would likley add
complexity to ns_common and/or nsset. As I said earlier, this is a
decision that will likely be decided by how the code ends up looking.
> If so, is implementation of this in scope for the lsm_unshare()
> infrastructure?
No. The lsm_unshare() syscall would only operate on one LSM at a time
so a two stage process isn't needed at the LSM framework layer. It is
possible that an individual LSM may want to implement a two-stage
transaction in their lsm_unshare() callback, but that is their
decision.
> To close, at the risk of being the devils advocate.
>
> Given that the sentiment is to force almost all of these
> issues/decisions into the individual LSM's, what is the advantage of
> having a common lsm_unshare() system call?
A single uniform API for userspace applications that wish to make use
of LSM namespaces. Ideally we want to leverage the existing kernel
APIs, e.g. procfs and setns(), but others, e.g. clone(), remain
impractical due to a combination of technical and political reasons
(we've already discussed some of the former, the latter is a rathole
discussion I'm not going to engage in at the moment).
> In the proposed model, a resource orchestrator is going to need to
> have extensive knowledge over the mechanics of all the LSM's that
> implement namespace functionality.
Maybe. I don't think orchestrators will need to have "extensive"
knowledge of the individual LSMs, although this largely depends on
what you define as "extensive".
I also want to get ahead of this and say that I have absolutely zero
desire to debate this point with you at the moment. It's an argument
without end and the discussion is unlikely to yield anything specific
enough to be helpful.
> At a very minimum, intrinsic to
> the concept of security namespaces, there will be a need to load a new
> policy or model into the namespace, an action that will be deeply LSM
> specific.
Possibly, as this is once again very LSM dependent. Some LSMs may not
need a new policy loaded when they create a new namespace.
I will also, once again, point you at the LSM policy loading syscall
ideas. While on hold, we've already discussed that they should be
namespace aware and potentially have the ability to trigger new LSM
namespace creation.
> At this point, the only common functionality may be the allocation of
> a new LSM namespace 'blob'.
Now you are starting to get it. The LSM framework exists primarily as
a multiplexing layer hidden beneath an API. Originally the API was
only for internal kernel users, but recently we started providing a
userspace syscall API.
--
paul-moore.com
^ permalink raw reply
* Re: [PATCH v3 0/5] Fix Landlock audit test flakiness
From: Günther Noack @ 2026-04-02 20:57 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Günther Noack, linux-security-module, Justin Suess,
Tingmao Wang
In-Reply-To: <20260402.eb5c4e85f472@gnoack.org>
On Thu, Apr 02, 2026 at 10:52:46PM +0200, Günther Noack wrote:
> My kernel config is this:
>
> make defconfig
> make kvm_guest.config
> KCONFIG_CONFIG="${KBUILD_OUTPUT}/.config" ./scripts/kconfig/merge_config.sh "${KBUILD_OUTPUT}/.config" tools/testing/selftests/landlock/config
> make debug.config
> echo "CONFIG_RANDOMIZE_BASE=n" >> "${KBUILD_OUTPUT}/.config"
> make olddefconfig
P.S.: I should point out, everytime that I have observed these
flakiness problems with the audit tests, it was in this debug config.
I suspect that it adds delays in a way that makes it more likely.
–Günther
^ permalink raw reply
* Re: [PATCH v3 0/5] Fix Landlock audit test flakiness
From: Günther Noack @ 2026-04-02 20:52 UTC (permalink / raw)
To: Mickaël Salaün
Cc: Günther Noack, linux-security-module, Justin Suess,
Tingmao Wang
In-Reply-To: <20260402192608.1458252-1-mic@digikod.net>
Hello!
On Thu, Apr 02, 2026 at 09:26:01PM +0200, Mickaël Salaün wrote:
> This series fixes two classes of audit selftest failures plus two minor
> bugs in the audit test helpers.
>
> The main issue is that domain deallocation audit records are emitted
> asynchronously from kworker threads and can arrive after a previous
> test's socket has been closed. This causes two distinct failure modes:
>
> - audit_match_record() picks up a stale deallocation record from a
> previous test instead of the expected one, causing a domain ID
> mismatch. The audit.layers test (which reads 16 deallocation records
> in sequence) is particularly vulnerable because the large read window
> allows stale records to interleave. Patch 4 fixes this by filtering
> deallocation records by domain ID and skipping type-matching records
> with wrong content patterns.
>
> - audit_count_records() counts stale deallocation records from a
> previous test, incrementing records.domain from the expected 0 to 1.
> Patch 3 fixes this by draining stale records at audit_init() time and
> removing records.domain == 0 checks that are not preceded by
> audit_match_record() calls (which would consume stale records).
>
> These races are more likely to manifest when additional instrumentation
> changes kworker timing in the deallocation path (e.g. with the upcoming
> Landlock tracepoints work).
>
> The two minor fixes (patches 1-2) correct a snprintf truncation check
> off-by-one and socket file descriptor leaks on error paths in
> audit_init(), audit_init_with_exe_filter(), and audit_cleanup().
> Patch 5 fixes a __u64 format warning reported by the kbuild bot on
> powerpc64.
>
> Patch 1 is an exact subset of the v1 combined patch, which is why it
> carries the Reviewed-by tag. Patches 2 and 3 extend beyond what was in
> v1, so the Reviewed-by is not carried. Patches 4 and 5 are new.
>
> Changes since v2:
> https://lore.kernel.org/r/20260401161503.1136946-1-mic@digikod.net
> - Patches 4-5: fix __u64 format warnings on powerpc64 (cast to unsigned
> long long for %llx). Patch 5 is new.
>
> Changes since v1:
> https://lore.kernel.org/r/20260312100444.2609563-8-mic@digikod.net
> - Split the combined drain fix into four separate patches.
> - Patch 2: extend fd leak fix to audit_init_with_exe_filter() and
> audit_cleanup().
> - Patch 3: also remove domain checks from audit.trace and
> scoped_audit.connect_to_child, document constraint, explain why a
> longer drain timeout was rejected.
> - Patch 4: new, add domain ID filtering and timeout management to
> matches_log_domain_deallocated(), skip stale records in
> audit_match_record().
>
> Mickaël Salaün (5):
> selftests/landlock: Fix snprintf truncation checks in audit helpers
> selftests/landlock: Fix socket file descriptor leaks in audit helpers
> selftests/landlock: Drain stale audit records on init
> selftests/landlock: Skip stale records in audit_match_record()
> selftests/landlock: Fix format warning for __u64 in net_test
>
> tools/testing/selftests/landlock/audit.h | 133 ++++++++++++++----
> tools/testing/selftests/landlock/audit_test.c | 36 ++---
> tools/testing/selftests/landlock/net_test.c | 2 +-
> .../testing/selftests/landlock/ptrace_test.c | 1 -
> .../landlock/scoped_abstract_unix_test.c | 1 -
> 5 files changed, 119 insertions(+), 54 deletions(-)
>
> --
> 2.53.0
>
I am still getting flaky audit tests even with these patches, I am
afraid. It differs which of these tests is flaking, some of them
still do, for example:
# RUN audit_layout1.remove_dir ...
# fs_test.c:7281:remove_dir:Expected 0 (0) == matches_log_fs(_metadata, self->audit_fd, "fs\\.remove_dir", dir_s1d2) (-11)
# remove_dir: Test failed
# ❌ FAIL audit_layout1.remove_dir
not ok 191 audit_layout1.remove_dir
# RUN audit_layout1.read_dir ...
# ✅ OK audit_layout1.read_dir
ok 192 audit_layout1.read_dir
# RUN audit_layout1.read_file ...
# ✅ OK audit_layout1.read_file
ok 193 audit_layout1.read_file
# RUN audit_layout1.write_file ...
# fs_test.c:7221:write_file:Expected 0 (0) == matches_log_fs(_metadata, self->audit_fd, "fs\\.write_file", file1_s1d1) (-11)
# fs_test.c:7224:write_file:Expected 0 (0) == records.access (1)
# write_file: Test failed
# ❌ FAIL audit_layout1.write_file
not ok 194 audit_layout1.write_file
My kernel config is this:
make defconfig
make kvm_guest.config
KCONFIG_CONFIG="${KBUILD_OUTPUT}/.config" ./scripts/kconfig/merge_config.sh "${KBUILD_OUTPUT}/.config" tools/testing/selftests/landlock/config
make debug.config
echo "CONFIG_RANDOMIZE_BASE=n" >> "${KBUILD_OUTPUT}/.config"
make olddefconfig
and then I run the selftests in Qemu with these flags:
qemu-system-x86_64 \
-nographic \
-m 4G \
-enable-kvm \
-append "console=ttyS0 lsm=landlock no_hash_pointers" \
-kernel "${KBUILD_OUTPUT}/arch/x86/boot/bzImage" \
-initrd "${INITRAMFS}"
This is using my own selftest runner scripts which builds an initramfs
with the statically linked selftests.
Do you have a hunch what might be missing there? In the test run
above, I have applied your V4 patch set on top of the current master,
5619b098e2fbf3a23bf13d91897056a1fe238c6d ("Merge tag 'for-7.0-rc6-tag'
of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux").
–Günther
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox