Linux Security Modules development

Linux Security Modules development
 help / color / mirror / Atom feed

* [PATCH v4 05/13] ima: Introduce _ima_measurements_start() and _ima_measurements_next()
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Introduce _ima_measurements_start() and _ima_measurements_next(), renamed
from ima_measurements_start() and ima_measurements_next(), to include the
list head as an additional parameter, so that iteration on different lists
can be implemented by calling those functions.

No functional change: ima_measurements_start() and ima_measurements_next()
pass the ima_measurements list head, used before.

Link: https://github.com/linux-integrity/linux/issues/1
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 security/integrity/ima/ima_fs.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 79b0f287c668..9a8dba14d82a 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -72,14 +72,15 @@ static const struct file_operations ima_measurements_count_ops = {
 };
 
 /* returns pointer to hlist_node */
-static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
+static void *_ima_measurements_start(struct seq_file *m, loff_t *pos,
+				     struct list_head *head)
 {
 	loff_t l = *pos;
 	struct ima_queue_entry *qe;
 
 	/* we need a lock since pos could point beyond last element */
 	rcu_read_lock();
-	list_for_each_entry_rcu(qe, &ima_measurements, later) {
+	list_for_each_entry_rcu(qe, head, later) {
 		if (!l--) {
 			rcu_read_unlock();
 			return qe;
@@ -89,7 +90,13 @@ static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
 	return NULL;
 }
 
-static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
+static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
+{
+	return _ima_measurements_start(m, pos, &ima_measurements);
+}
+
+static void *_ima_measurements_next(struct seq_file *m, void *v, loff_t *pos,
+				    struct list_head *head)
 {
 	struct ima_queue_entry *qe = v;
 
@@ -101,7 +108,12 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
 	rcu_read_unlock();
 	(*pos)++;
 
-	return (&qe->later == &ima_measurements) ? NULL : qe;
+	return (&qe->later == head) ? NULL : qe;
+}
+
+static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return _ima_measurements_next(m, v, pos, &ima_measurements);
 }
 
 static void ima_measurements_stop(struct seq_file *m, void *v)
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 06/13] ima: Mediate open/release method of the measurements list
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Introduce the ima_measure_users counter, to implement a semaphore-like
locking scheme where the binary and ASCII measurements list interfaces can
be concurrently open by multiple readers, or alternatively by a single
writer.

A semaphore cannot be used because the kernel cannot return to user space
with a lock held.

Introduce the ima_measure_lock() and ima_measure_unlock() primitives, to
respectively lock/unlock the interfaces (safely with the ima_measure_users
counter, without holding a lock).

Finally, introduce _ima_measurements_open() to lock the interface before
seq_open(), and call it from ima_measurements_open() and
ima_ascii_measurements_open(). And, introduce ima_measurements_release(),
to unlock the interface.

Require CAP_SYS_ADMIN if the interface is opened for write (not possible
for the current measurements interfaces, since they only have read
permission).

No functional changes: multiple readers are allowed as before.

Link: https://github.com/linux-integrity/linux/issues/1
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 security/integrity/ima/ima_fs.c | 71 +++++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 4 deletions(-)

diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 9a8dba14d82a..68edea7139d5 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -25,6 +25,8 @@
 #include "ima.h"
 
 static DEFINE_MUTEX(ima_write_mutex);
+static DEFINE_MUTEX(ima_measure_mutex);
+static long ima_measure_users;
 
 bool ima_canonical_fmt;
 static int __init default_canonical_fmt_setup(char *str)
@@ -209,16 +211,76 @@ static const struct seq_operations ima_measurments_seqops = {
 	.show = ima_measurements_show
 };
 
+static int ima_measure_lock(bool write)
+{
+	mutex_lock(&ima_measure_mutex);
+	if ((write && ima_measure_users != 0) ||
+	    (!write && ima_measure_users < 0)) {
+		mutex_unlock(&ima_measure_mutex);
+		return -EBUSY;
+	}
+
+	if (write)
+		ima_measure_users--;
+	else
+		ima_measure_users++;
+	mutex_unlock(&ima_measure_mutex);
+	return 0;
+}
+
+static void ima_measure_unlock(bool write)
+{
+	mutex_lock(&ima_measure_mutex);
+	if (write)
+		ima_measure_users++;
+	else
+		ima_measure_users--;
+	mutex_unlock(&ima_measure_mutex);
+}
+
+static int _ima_measurements_open(struct inode *inode, struct file *file,
+				  const struct seq_operations *seq_ops)
+{
+	bool write = (file->f_mode & FMODE_WRITE);
+	int ret;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ret = ima_measure_lock(write);
+	if (ret < 0)
+		return ret;
+
+	ret = seq_open(file, seq_ops);
+	if (ret < 0)
+		ima_measure_unlock(write);
+
+	return ret;
+}
+
 static int ima_measurements_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ima_measurments_seqops);
+	return _ima_measurements_open(inode, file, &ima_measurments_seqops);
+}
+
+static int ima_measurements_release(struct inode *inode, struct file *file)
+{
+	bool write = (file->f_mode & FMODE_WRITE);
+	int ret;
+
+	/* seq_release() always returns zero. */
+	ret = seq_release(inode, file);
+
+	ima_measure_unlock(write);
+
+	return ret;
 }
 
 static const struct file_operations ima_measurements_ops = {
 	.open = ima_measurements_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release,
+	.release = ima_measurements_release,
 };
 
 void ima_print_digest(struct seq_file *m, u8 *digest, u32 size)
@@ -283,14 +345,15 @@ static const struct seq_operations ima_ascii_measurements_seqops = {
 
 static int ima_ascii_measurements_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ima_ascii_measurements_seqops);
+	return _ima_measurements_open(inode, file,
+				      &ima_ascii_measurements_seqops);
 }
 
 static const struct file_operations ima_ascii_measurements_ops = {
 	.open = ima_ascii_measurements_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release,
+	.release = ima_measurements_release,
 };
 
 static ssize_t ima_read_policy(char *path)
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 07/13] ima: Use snprintf() in create_securityfs_measurement_lists
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Use the more secure snprintf() function (accepting the buffer size) in
create_securityfs_measurement_lists().

No functional change: sprintf() and snprintf() have the same behavior.

Link: https://github.com/linux-integrity/linux/issues/1
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 security/integrity/ima/ima_fs.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 68edea7139d5..7709a4576322 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -472,11 +472,13 @@ static int __init create_securityfs_measurement_lists(void)
 		struct dentry *dentry;
 
 		if (algo == HASH_ALGO__LAST)
-			sprintf(file_name, "ascii_runtime_measurements_tpm_alg_%x",
-				ima_tpm_chip->allocated_banks[i].alg_id);
+			snprintf(file_name, sizeof(file_name),
+				 "ascii_runtime_measurements_tpm_alg_%x",
+				 ima_tpm_chip->allocated_banks[i].alg_id);
 		else
-			sprintf(file_name, "ascii_runtime_measurements_%s",
-				hash_algo_name[algo]);
+			snprintf(file_name, sizeof(file_name),
+				 "ascii_runtime_measurements_%s",
+				 hash_algo_name[algo]);
 		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
 						ima_dir, (void *)(uintptr_t)i,
 						&ima_ascii_measurements_ops);
@@ -484,11 +486,13 @@ static int __init create_securityfs_measurement_lists(void)
 			return PTR_ERR(dentry);
 
 		if (algo == HASH_ALGO__LAST)
-			sprintf(file_name, "binary_runtime_measurements_tpm_alg_%x",
-				ima_tpm_chip->allocated_banks[i].alg_id);
+			snprintf(file_name, sizeof(file_name),
+				 "binary_runtime_measurements_tpm_alg_%x",
+				 ima_tpm_chip->allocated_banks[i].alg_id);
 		else
-			sprintf(file_name, "binary_runtime_measurements_%s",
-				hash_algo_name[algo]);
+			snprintf(file_name, sizeof(file_name),
+				 "binary_runtime_measurements_%s",
+				 hash_algo_name[algo]);
 		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
 						ima_dir, (void *)(uintptr_t)i,
 						&ima_measurements_ops);
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 08/13] ima: Introduce ima_dump_measurement()
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Introduce ima_dump_measurement() to simplify the code of
ima_dump_measurement_list() and to avoid repeating the
ima_dump_measurement() code block if iteration occurs on multiple lists.

No functional change: only code moved to a separate function.

Link: https://github.com/linux-integrity/linux/issues/1
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 security/integrity/ima/ima_kexec.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 44ebefbdcab0..d7d0fb639d99 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -80,6 +80,17 @@ static int ima_alloc_kexec_file_buf(size_t segment_size)
 	return 0;
 }
 
+static int ima_dump_measurement(struct ima_kexec_hdr *khdr,
+				struct ima_queue_entry *qe)
+{
+	if (ima_kexec_file.count >= ima_kexec_file.size)
+		return -EINVAL;
+
+	khdr->count++;
+	ima_measurements_show(&ima_kexec_file, qe);
+	return 0;
+}
+
 static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
 				     unsigned long segment_size)
 {
@@ -97,13 +108,9 @@ static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
 	khdr.version = 1;
 	/* This is an append-only list, no need to hold the RCU read lock */
 	list_for_each_entry_rcu(qe, &ima_measurements, later, true) {
-		if (ima_kexec_file.count < ima_kexec_file.size) {
-			khdr.count++;
-			ima_measurements_show(&ima_kexec_file, qe);
-		} else {
-			ret = -EINVAL;
+		ret = ima_dump_measurement(&khdr, qe);
+		if (ret < 0)
 			break;
-		}
 	}
 
 	/*
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 09/13] ima: Add support for staging measurements with prompt
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Introduce the ability of staging the IMA measurement list and deleting them
with a prompt.

Staging means moving the current content of the measurement list to a
separate location, and allowing users to read and delete it. This causes
the measurement list to be atomically truncated before new measurements can
be added. Staging can be done only once at a time. In the event of kexec(),
staging is reverted and staged entries will be carried over to the new
kernel.

Introduce ascii_runtime_measurements_<algo>_staged and
binary_runtime_measurements_<algo>_staged interfaces to stage and delete
the measurements. Use 'echo A > <IMA interface>' and
'echo D > <IMA interface>' to respectively stage and delete the entire
measurements list. Locking of these interfaces is also mediated with a call
to _ima_measurements_open() and with ima_measurements_release().

Implement the staging functionality by introducing the new global
measurements list ima_measurements_staged, and ima_queue_stage() and
ima_queue_delete_staged_all() to respectively move measurements from the
current measurements list to the staged one, and to move staged
measurements to the ima_measurements_trim list for deletion. Introduce
ima_queue_delete() to delete the measurements.

Finally, introduce the BINARY_STAGED AND BINARY_FULL binary measurements
list types, to maintain the counters and the binary size of staged
measurements and the full measurements list (including entries that were
staged). BINARY still represents the current binary measurements list.

Use the binary size for the BINARY + BINARY_STAGED types in
ima_add_kexec_buffer(), since both measurements list types are copied to
the secondary kernel during kexec. Use BINARY_FULL in
ima_measure_kexec_event(), to generate a critical data record.

It should be noted that the BINARY_FULL counter is not passed through
kexec. Thus, the number of entries included in the kexec critical data
records refers to the entries since the previous kexec records.

Note: This code derives from the Alt-IMA Huawei project, whose license is
      GPL-2.0 OR MIT.

Link: https://github.com/linux-integrity/linux/issues/1
Suggested-by: Gregory Lumen <gregorylumen@linux.microsoft.com> (staging revert)
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 security/integrity/ima/Kconfig     |  13 +++
 security/integrity/ima/ima.h       |   8 +-
 security/integrity/ima/ima_fs.c    | 167 ++++++++++++++++++++++++++---
 security/integrity/ima/ima_kexec.c |  22 +++-
 security/integrity/ima/ima_queue.c |  97 ++++++++++++++++-
 5 files changed, 286 insertions(+), 21 deletions(-)

diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 976e75f9b9ba..e714726f3384 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -332,4 +332,17 @@ config IMA_KEXEC_EXTRA_MEMORY_KB
 	  If set to the default value of 0, an extra half page of memory for those
 	  additional measurements will be allocated.
 
+config IMA_STAGING
+	bool "Support for staging the measurements list"
+	default y
+	help
+	  Add support for staging the measurements list.
+
+	  It allows user space to stage the measurements list for deletion and
+	  to delete the staged measurements after confirmation.
+
+	  On kexec, staging is reverted and staged measurements are prepended
+	  to the current measurements list when measurements are copied to the
+	  secondary kernel.
+
 endif
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 97b7d6024b5d..65db152a0a24 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -30,9 +30,11 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8, TPM_PCR10 = 10 };
 
 /*
  * BINARY: current binary measurements list
+ * BINARY_STAGED: staged binary measurements list
+ * BINARY_FULL: binary measurements list since IMA init (lost after kexec)
  */
 enum binary_lists {
-	BINARY, BINARY__LAST
+	BINARY, BINARY_STAGED, BINARY_FULL, BINARY__LAST
 };
 
 /* digest size for IMA, fits SHA1 or MD5 */
@@ -125,6 +127,7 @@ struct ima_queue_entry {
 	struct ima_template_entry *entry;
 };
 extern struct list_head ima_measurements;	/* list of all measurements */
+extern struct list_head ima_measurements_staged; /* list of staged meas. */
 
 /* Some details preceding the binary serialized measurement list */
 struct ima_kexec_hdr {
@@ -314,6 +317,8 @@ struct ima_template_desc *ima_template_desc_current(void);
 struct ima_template_desc *ima_template_desc_buf(void);
 struct ima_template_desc *lookup_template_desc(const char *name);
 bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
+int ima_queue_stage(void);
+int ima_queue_staged_delete_all(void);
 int ima_restore_measurement_entry(struct ima_template_entry *entry);
 int ima_restore_measurement_list(loff_t bufsize, void *buf);
 int ima_measurements_show(struct seq_file *m, void *v);
@@ -334,6 +339,7 @@ extern spinlock_t ima_queue_lock;
 extern atomic_long_t ima_num_entries[BINARY__LAST];
 extern atomic_long_t ima_num_violations;
 extern struct hlist_head __rcu *ima_htable;
+extern struct mutex ima_extend_list_mutex;
 
 static inline unsigned int ima_hash_key(u8 *digest)
 {
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 7709a4576322..39d9128e9f22 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -24,6 +24,13 @@
 
 #include "ima.h"
 
+/*
+ * Requests:
+ * 'A\n': stage the entire measurements list
+ * 'D\n': delete all staged measurements
+ */
+#define STAGED_REQ_LENGTH 21
+
 static DEFINE_MUTEX(ima_write_mutex);
 static DEFINE_MUTEX(ima_measure_mutex);
 static long ima_measure_users;
@@ -97,6 +104,11 @@ static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
 	return _ima_measurements_start(m, pos, &ima_measurements);
 }
 
+static void *ima_measurements_staged_start(struct seq_file *m, loff_t *pos)
+{
+	return _ima_measurements_start(m, pos, &ima_measurements_staged);
+}
+
 static void *_ima_measurements_next(struct seq_file *m, void *v, loff_t *pos,
 				    struct list_head *head)
 {
@@ -118,6 +130,12 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
 	return _ima_measurements_next(m, v, pos, &ima_measurements);
 }
 
+static void *ima_measurements_staged_next(struct seq_file *m, void *v,
+					  loff_t *pos)
+{
+	return _ima_measurements_next(m, v, pos, &ima_measurements_staged);
+}
+
 static void ima_measurements_stop(struct seq_file *m, void *v)
 {
 }
@@ -283,6 +301,68 @@ static const struct file_operations ima_measurements_ops = {
 	.release = ima_measurements_release,
 };
 
+static const struct seq_operations ima_measurments_staged_seqops = {
+	.start = ima_measurements_staged_start,
+	.next = ima_measurements_staged_next,
+	.stop = ima_measurements_stop,
+	.show = ima_measurements_show
+};
+
+static int ima_measurements_staged_open(struct inode *inode, struct file *file)
+{
+	return _ima_measurements_open(inode, file,
+				      &ima_measurments_staged_seqops);
+}
+
+static ssize_t ima_measurements_staged_write(struct file *file,
+					     const char __user *buf,
+					     size_t datalen, loff_t *ppos)
+{
+	char req[STAGED_REQ_LENGTH];
+	int ret;
+
+	if (*ppos > 0 || datalen < 2 || datalen > STAGED_REQ_LENGTH)
+		return -EINVAL;
+
+	if (copy_from_user(req, buf, datalen) != 0)
+		return -EFAULT;
+
+	if (req[datalen - 1] != '\n')
+		return -EINVAL;
+
+	req[datalen - 1] = '\0';
+
+	switch (req[0]) {
+	case 'A':
+		if (datalen != 2)
+			return -EINVAL;
+
+		ret = ima_queue_stage();
+		break;
+	case 'D':
+		if (datalen != 2)
+			return -EINVAL;
+
+		ret = ima_queue_staged_delete_all();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	return datalen;
+}
+
+static const struct file_operations ima_measurements_staged_ops = {
+	.open = ima_measurements_staged_open,
+	.read = seq_read,
+	.write = ima_measurements_staged_write,
+	.llseek = seq_lseek,
+	.release = ima_measurements_release,
+};
+
 void ima_print_digest(struct seq_file *m, u8 *digest, u32 size)
 {
 	u32 i;
@@ -356,6 +436,28 @@ static const struct file_operations ima_ascii_measurements_ops = {
 	.release = ima_measurements_release,
 };
 
+static const struct seq_operations ima_ascii_measurements_staged_seqops = {
+	.start = ima_measurements_staged_start,
+	.next = ima_measurements_staged_next,
+	.stop = ima_measurements_stop,
+	.show = ima_ascii_measurements_show
+};
+
+static int ima_ascii_measurements_staged_open(struct inode *inode,
+					      struct file *file)
+{
+	return _ima_measurements_open(inode, file,
+				      &ima_ascii_measurements_staged_seqops);
+}
+
+static const struct file_operations ima_ascii_measurements_staged_ops = {
+	.open = ima_ascii_measurements_staged_open,
+	.read = seq_read,
+	.write = ima_measurements_staged_write,
+	.llseek = seq_lseek,
+	.release = ima_measurements_release,
+};
+
 static ssize_t ima_read_policy(char *path)
 {
 	void *data = NULL;
@@ -459,10 +561,21 @@ static const struct seq_operations ima_policy_seqops = {
 };
 #endif
 
-static int __init create_securityfs_measurement_lists(void)
+static int __init create_securityfs_measurement_lists(bool staging)
 {
+	const struct file_operations *ascii_ops = &ima_ascii_measurements_ops;
+	const struct file_operations *binary_ops = &ima_measurements_ops;
+	mode_t permissions = S_IRUSR | S_IRGRP;
+	const char *file_suffix = "";
 	int count = NR_BANKS(ima_tpm_chip);
 
+	if (staging) {
+		ascii_ops = &ima_ascii_measurements_staged_ops;
+		binary_ops = &ima_measurements_staged_ops;
+		file_suffix = "_staged";
+		permissions |= (S_IWUSR | S_IWGRP);
+	}
+
 	if (ima_sha1_idx >= NR_BANKS(ima_tpm_chip))
 		count++;
 
@@ -473,29 +586,32 @@ static int __init create_securityfs_measurement_lists(void)
 
 		if (algo == HASH_ALGO__LAST)
 			snprintf(file_name, sizeof(file_name),
-				 "ascii_runtime_measurements_tpm_alg_%x",
-				 ima_tpm_chip->allocated_banks[i].alg_id);
+				 "ascii_runtime_measurements_tpm_alg_%x%s",
+				 ima_tpm_chip->allocated_banks[i].alg_id,
+				 file_suffix);
 		else
 			snprintf(file_name, sizeof(file_name),
-				 "ascii_runtime_measurements_%s",
-				 hash_algo_name[algo]);
-		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
+				 "ascii_runtime_measurements_%s%s",
+				 hash_algo_name[algo], file_suffix);
+		dentry = securityfs_create_file(file_name, permissions,
 						ima_dir, (void *)(uintptr_t)i,
-						&ima_ascii_measurements_ops);
+						ascii_ops);
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
 
 		if (algo == HASH_ALGO__LAST)
 			snprintf(file_name, sizeof(file_name),
-				 "binary_runtime_measurements_tpm_alg_%x",
-				 ima_tpm_chip->allocated_banks[i].alg_id);
+				 "binary_runtime_measurements_tpm_alg_%x%s",
+				 ima_tpm_chip->allocated_banks[i].alg_id,
+				 file_suffix);
 		else
 			snprintf(file_name, sizeof(file_name),
-				 "binary_runtime_measurements_%s",
-				 hash_algo_name[algo]);
-		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
+				 "binary_runtime_measurements_%s%s",
+				 hash_algo_name[algo], file_suffix);
+
+		dentry = securityfs_create_file(file_name, permissions,
 						ima_dir, (void *)(uintptr_t)i,
-						&ima_measurements_ops);
+						binary_ops);
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
 	}
@@ -503,6 +619,23 @@ static int __init create_securityfs_measurement_lists(void)
 	return 0;
 }
 
+static int __init create_securityfs_staging_links(void)
+{
+	struct dentry *dentry;
+
+	dentry = securityfs_create_symlink("binary_runtime_measurements_staged",
+		ima_dir, "binary_runtime_measurements_sha1_staged", NULL);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	dentry = securityfs_create_symlink("ascii_runtime_measurements_staged",
+		ima_dir, "ascii_runtime_measurements_sha1_staged", NULL);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	return 0;
+}
+
 /*
  * ima_open_policy: sequentialize access to the policy file
  */
@@ -595,7 +728,13 @@ int __init ima_fs_init(void)
 		goto out;
 	}
 
-	ret = create_securityfs_measurement_lists();
+	ret = create_securityfs_measurement_lists(false);
+	if (ret == 0 && IS_ENABLED(CONFIG_IMA_STAGING)) {
+		ret = create_securityfs_measurement_lists(true);
+		if (ret == 0)
+			ret = create_securityfs_staging_links();
+	}
+
 	if (ret != 0)
 		goto out;
 
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index d7d0fb639d99..d5503dd5cc9b 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -42,8 +42,8 @@ void ima_measure_kexec_event(const char *event_name)
 	long len;
 	int n;
 
-	buf_size = ima_get_binary_runtime_size(BINARY);
-	len = atomic_long_read(&ima_num_entries[BINARY]);
+	buf_size = ima_get_binary_runtime_size(BINARY_FULL);
+	len = atomic_long_read(&ima_num_entries[BINARY_FULL]);
 
 	n = scnprintf(ima_kexec_event, IMA_KEXEC_EVENT_LEN,
 		      "kexec_segment_size=%lu;ima_binary_runtime_size=%lu;"
@@ -106,13 +106,26 @@ static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
 
 	memset(&khdr, 0, sizeof(khdr));
 	khdr.version = 1;
-	/* This is an append-only list, no need to hold the RCU read lock */
-	list_for_each_entry_rcu(qe, &ima_measurements, later, true) {
+	/* It can race with ima_queue_stage() and ima_queue_delete_staged(). */
+	mutex_lock(&ima_extend_list_mutex);
+
+	list_for_each_entry_rcu(qe, &ima_measurements_staged, later,
+				lockdep_is_held(&ima_extend_list_mutex)) {
 		ret = ima_dump_measurement(&khdr, qe);
 		if (ret < 0)
 			break;
 	}
 
+	list_for_each_entry_rcu(qe, &ima_measurements, later,
+				lockdep_is_held(&ima_extend_list_mutex)) {
+		if (!ret)
+			ret = ima_dump_measurement(&khdr, qe);
+		if (ret < 0)
+			break;
+	}
+
+	mutex_unlock(&ima_extend_list_mutex);
+
 	/*
 	 * fill in reserved space with some buffer details
 	 * (eg. version, buffer size, number of measurements)
@@ -167,6 +180,7 @@ void ima_add_kexec_buffer(struct kimage *image)
 		extra_memory = CONFIG_IMA_KEXEC_EXTRA_MEMORY_KB * 1024;
 
 	binary_runtime_size = ima_get_binary_runtime_size(BINARY) +
+			      ima_get_binary_runtime_size(BINARY_STAGED) +
 			      extra_memory;
 
 	if (binary_runtime_size >= ULONG_MAX - PAGE_SIZE)
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index b6d10dceb669..50519ed837d4 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -26,6 +26,7 @@
 static struct tpm_digest *digests;
 
 LIST_HEAD(ima_measurements);	/* list of all measurements */
+LIST_HEAD(ima_measurements_staged); /* list of staged measurements */
 #ifdef CONFIG_IMA_KEXEC
 static unsigned long binary_runtime_size[BINARY__LAST];
 #else
@@ -45,11 +46,11 @@ atomic_long_t ima_num_violations = ATOMIC_LONG_INIT(0);
 /* key: inode (before secure-hashing a file) */
 struct hlist_head __rcu *ima_htable;
 
-/* mutex protects atomicity of extending measurement list
+/* mutex protects atomicity of extending and staging measurement list
  * and extending the TPM PCR aggregate. Since tpm_extend can take
  * long (and the tpm driver uses a mutex), we can't use the spinlock.
  */
-static DEFINE_MUTEX(ima_extend_list_mutex);
+DEFINE_MUTEX(ima_extend_list_mutex);
 
 /*
  * Used internally by the kernel to suspend measurements.
@@ -174,12 +175,16 @@ static int ima_add_digest_entry(struct ima_template_entry *entry,
 				lockdep_is_held(&ima_extend_list_mutex));
 
 	atomic_long_inc(&ima_num_entries[BINARY]);
+	atomic_long_inc(&ima_num_entries[BINARY_FULL]);
+
 	if (update_htable) {
 		key = ima_hash_key(entry->digests[ima_hash_algo_idx].digest);
 		hlist_add_head_rcu(&qe->hnext, &htable[key]);
 	}
 
 	ima_update_binary_runtime_size(entry, BINARY);
+	ima_update_binary_runtime_size(entry, BINARY_FULL);
+
 	return 0;
 }
 
@@ -280,6 +285,94 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
 	return result;
 }
 
+int ima_queue_stage(void)
+{
+	int ret = 0;
+
+	mutex_lock(&ima_extend_list_mutex);
+	if (!list_empty(&ima_measurements_staged)) {
+		ret = -EEXIST;
+		goto out_unlock;
+	}
+
+	if (list_empty(&ima_measurements)) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	list_replace(&ima_measurements, &ima_measurements_staged);
+	INIT_LIST_HEAD(&ima_measurements);
+
+	atomic_long_set(&ima_num_entries[BINARY_STAGED],
+			atomic_long_read(&ima_num_entries[BINARY]));
+	atomic_long_set(&ima_num_entries[BINARY], 0);
+
+	if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
+		binary_runtime_size[BINARY_STAGED] =
+					binary_runtime_size[BINARY];
+		binary_runtime_size[BINARY] = 0;
+	}
+out_unlock:
+	mutex_unlock(&ima_extend_list_mutex);
+	return ret;
+}
+
+static void ima_queue_delete(struct list_head *head);
+
+int ima_queue_staged_delete_all(void)
+{
+	LIST_HEAD(ima_measurements_trim);
+
+	mutex_lock(&ima_extend_list_mutex);
+	if (list_empty(&ima_measurements_staged)) {
+		mutex_unlock(&ima_extend_list_mutex);
+		return -ENOENT;
+	}
+
+	list_replace(&ima_measurements_staged, &ima_measurements_trim);
+	INIT_LIST_HEAD(&ima_measurements_staged);
+
+	atomic_long_set(&ima_num_entries[BINARY_STAGED], 0);
+
+	if (IS_ENABLED(CONFIG_IMA_KEXEC))
+		binary_runtime_size[BINARY_STAGED] = 0;
+
+	mutex_unlock(&ima_extend_list_mutex);
+
+	ima_queue_delete(&ima_measurements_trim);
+	return 0;
+}
+
+static void ima_queue_delete(struct list_head *head)
+{
+	struct ima_queue_entry *qe, *qe_tmp;
+	unsigned int i;
+
+	list_for_each_entry_safe(qe, qe_tmp, head, later) {
+		/*
+		 * Safe to free template_data here without synchronize_rcu()
+		 * because the only htable reader, ima_lookup_digest_entry(),
+		 * accesses only entry->digests, not template_data. If new
+		 * htable readers are added that access template_data, a
+		 * synchronize_rcu() is required here.
+		 */
+		for (i = 0; i < qe->entry->template_desc->num_fields; i++) {
+			kfree(qe->entry->template_data[i].data);
+			qe->entry->template_data[i].data = NULL;
+			qe->entry->template_data[i].len = 0;
+		}
+
+		list_del(&qe->later);
+
+		/* No leak if condition is false, referenced by ima_htable. */
+		if (IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) {
+			kfree(qe->entry->digests);
+			kfree(qe->entry);
+			kfree(qe);
+		}
+	}
+}
+
 int ima_restore_measurement_entry(struct ima_template_entry *entry)
 {
 	int result = 0;
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 10/13] ima: Add support for flushing the hash table when staging measurements
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Introduce the new kernel option ima_flush_htable to decide whether or not
the digests of staged measurement entries are flushed from the hash table,
when they are deleted.

When the option is enabled, replace the old hash table with a new one,
by calling ima_alloc_replace_htable(), and completely delete the
measurements entries.

Note: This code derives from the Alt-IMA Huawei project, whose license is
      GPL-2.0 OR MIT.

Link: https://github.com/linux-integrity/linux/issues/1
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 .../admin-guide/kernel-parameters.txt         |  4 +++
 security/integrity/ima/ima.h                  |  1 +
 security/integrity/ima/ima_queue.c            | 36 ++++++++++++++++---
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 89670c5e7c8e..a651a3864dcf 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2345,6 +2345,10 @@ Kernel parameters
 			Use the canonical format for the binary runtime
 			measurements, instead of host native format.
 
+	ima_flush_htable  [IMA]
+			Flush the IMA hash table when deleting all the
+			staged measurement entries.
+
 	ima_hash=	[IMA]
 			Format: { md5 | sha1 | rmd160 | sha256 | sha384
 				   | sha512 | ... }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 65db152a0a24..699b735dec7d 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -340,6 +340,7 @@ extern atomic_long_t ima_num_entries[BINARY__LAST];
 extern atomic_long_t ima_num_violations;
 extern struct hlist_head __rcu *ima_htable;
 extern struct mutex ima_extend_list_mutex;
+extern bool ima_flush_htable;
 
 static inline unsigned int ima_hash_key(u8 *digest)
 {
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index 50519ed837d4..f5c18acfbc43 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -22,6 +22,20 @@
 
 #define AUDIT_CAUSE_LEN_MAX 32
 
+bool ima_flush_htable;
+
+static int __init ima_flush_htable_setup(char *str)
+{
+	if (IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) {
+		pr_warn("Hash table not enabled, ignoring request to flush\n");
+		return 1;
+	}
+
+	ima_flush_htable = true;
+	return 1;
+}
+__setup("ima_flush_htable", ima_flush_htable_setup);
+
 /* pre-allocated array of tpm_digest structures to extend a PCR */
 static struct tpm_digest *digests;
 
@@ -317,10 +331,11 @@ int ima_queue_stage(void)
 	return ret;
 }
 
-static void ima_queue_delete(struct list_head *head);
+static void ima_queue_delete(struct list_head *head, bool flush_htable);
 
 int ima_queue_staged_delete_all(void)
 {
+	struct hlist_head *old_queue = NULL;
 	LIST_HEAD(ima_measurements_trim);
 
 	mutex_lock(&ima_extend_list_mutex);
@@ -337,13 +352,26 @@ int ima_queue_staged_delete_all(void)
 	if (IS_ENABLED(CONFIG_IMA_KEXEC))
 		binary_runtime_size[BINARY_STAGED] = 0;
 
+	if (ima_flush_htable) {
+		old_queue = ima_alloc_replace_htable();
+		if (IS_ERR(old_queue)) {
+			mutex_unlock(&ima_extend_list_mutex);
+			return PTR_ERR(old_queue);
+		}
+	}
+
 	mutex_unlock(&ima_extend_list_mutex);
 
-	ima_queue_delete(&ima_measurements_trim);
+	if (ima_flush_htable) {
+		synchronize_rcu();
+		kfree(old_queue);
+	}
+
+	ima_queue_delete(&ima_measurements_trim, ima_flush_htable);
 	return 0;
 }
 
-static void ima_queue_delete(struct list_head *head)
+static void ima_queue_delete(struct list_head *head, bool flush_htable)
 {
 	struct ima_queue_entry *qe, *qe_tmp;
 	unsigned int i;
@@ -365,7 +393,7 @@ static void ima_queue_delete(struct list_head *head)
 		list_del(&qe->later);
 
 		/* No leak if condition is false, referenced by ima_htable. */
-		if (IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) {
+		if (IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE) || flush_htable) {
 			kfree(qe->entry->digests);
 			kfree(qe->entry);
 			kfree(qe);
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 11/13] ima: Support staging and deleting N measurements entries
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Add support for sending a value N between 1 and ULONG_MAX to the staging
interface. This value represents the number of measurements that should be
deleted from the current measurements list.

This staging method allows the remote attestation agents to easily separate
the measurements that were verified (staged and deleted) from those that
weren't due to the race between taking a TPM quote and reading the
measurements list.

In order to minimize the locking time of ima_extend_list_mutex, deleting
N entries is realized by staging the entire current measurements list
(with the lock), by determining the N-th staged entry (without the lock),
and by splicing the entries in excess back to the current measurements list
(with the lock). Finally, the N entries are deleted (without the lock).

Flushing the hash table is not supported for N entries, since it would
require removing the N entries one by one from the hash table under the
ima_extend_list_mutex lock, which would increase the locking time.

The ima_extend_list_mutex lock is necessary in ima_dump_measurement_list()
because ima_queue_staged_delete_partial() uses __list_cut_position() to
modify ima_measurements_staged, for which no RCU-safe variant exists. For
the staging with prompt flavor alone, list_replace_rcu() could have been
used instead, but since both flavors share the same kexec serialization
path, the mutex is required regardless.

Link: https://github.com/linux-integrity/linux/issues/1
Suggested-by: Steven Chen <chenste@linux.microsoft.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 security/integrity/ima/Kconfig     |  3 ++
 security/integrity/ima/ima.h       |  1 +
 security/integrity/ima/ima_fs.c    | 22 +++++++++-
 security/integrity/ima/ima_queue.c | 70 ++++++++++++++++++++++++++++++
 4 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index e714726f3384..6ddb4e77bff5 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -341,6 +341,9 @@ config IMA_STAGING
 	  It allows user space to stage the measurements list for deletion and
 	  to delete the staged measurements after confirmation.
 
+	  Or, alternatively, it allows user space to specify N measurements
+	  entries to be deleted.
+
 	  On kexec, staging is reverted and staged measurements are prepended
 	  to the current measurements list when measurements are copied to the
 	  secondary kernel.
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 699b735dec7d..de0693fce53c 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -319,6 +319,7 @@ struct ima_template_desc *lookup_template_desc(const char *name);
 bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
 int ima_queue_stage(void);
 int ima_queue_staged_delete_all(void);
+int ima_queue_staged_delete_partial(unsigned long req_value);
 int ima_restore_measurement_entry(struct ima_template_entry *entry);
 int ima_restore_measurement_list(loff_t bufsize, void *buf);
 int ima_measurements_show(struct seq_file *m, void *v);
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 39d9128e9f22..eb3f343c1138 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -28,6 +28,7 @@
  * Requests:
  * 'A\n': stage the entire measurements list
  * 'D\n': delete all staged measurements
+ * '[1, ULONG_MAX]\n' delete N measurements entries
  */
 #define STAGED_REQ_LENGTH 21
 
@@ -319,6 +320,7 @@ static ssize_t ima_measurements_staged_write(struct file *file,
 					     size_t datalen, loff_t *ppos)
 {
 	char req[STAGED_REQ_LENGTH];
+	unsigned long req_value;
 	int ret;
 
 	if (*ppos > 0 || datalen < 2 || datalen > STAGED_REQ_LENGTH)
@@ -346,7 +348,25 @@ static ssize_t ima_measurements_staged_write(struct file *file,
 		ret = ima_queue_staged_delete_all();
 		break;
 	default:
-		ret = -EINVAL;
+		if (ima_flush_htable) {
+			pr_debug("Deleting staged N measurements not supported when flushing the hash table is requested\n");
+			return -EINVAL;
+		}
+
+		ret = kstrtoul(req, 10, &req_value);
+		if (ret < 0)
+			return ret;
+
+		if (req_value == 0) {
+			pr_debug("Must delete at least one entry\n");
+			return -EINVAL;
+		}
+
+		ret = ima_queue_stage();
+		if (ret < 0)
+			return ret;
+
+		ret = ima_queue_staged_delete_partial(req_value);
 	}
 
 	if (ret < 0)
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index f5c18acfbc43..4fb557d61a88 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -371,6 +371,76 @@ int ima_queue_staged_delete_all(void)
 	return 0;
 }
 
+int ima_queue_staged_delete_partial(unsigned long req_value)
+{
+	unsigned long req_value_copy = req_value;
+	unsigned long size_to_remove = 0, num_to_remove = 0;
+	struct list_head *cut_pos = NULL;
+	LIST_HEAD(ima_measurements_trim);
+	struct ima_queue_entry *qe;
+	int ret = 0;
+
+	/*
+	 * Safe walk (no concurrent write), not under ima_extend_list_mutex
+	 * for performance reasons.
+	 */
+	list_for_each_entry(qe, &ima_measurements_staged, later) {
+		size_to_remove += get_binary_runtime_size(qe->entry);
+		num_to_remove++;
+
+		if (--req_value_copy == 0) {
+			/* qe->later always points to a valid list entry. */
+			cut_pos = &qe->later;
+			break;
+		}
+	}
+
+	/* Nothing to remove, undoing staging. */
+	if (req_value_copy > 0) {
+		size_to_remove = 0;
+		num_to_remove = 0;
+		ret = -ENOENT;
+	}
+
+	mutex_lock(&ima_extend_list_mutex);
+	if (list_empty(&ima_measurements_staged)) {
+		mutex_unlock(&ima_extend_list_mutex);
+		return -ENOENT;
+	}
+
+	if (cut_pos != NULL)
+		/*
+		 * ima_dump_measurement_list() does not modify the list,
+		 * cut_pos remains the same even if it was computed before
+		 * the lock.
+		 */
+		__list_cut_position(&ima_measurements_trim,
+				    &ima_measurements_staged, cut_pos);
+
+	atomic_long_sub(num_to_remove, &ima_num_entries[BINARY_STAGED]);
+	atomic_long_add(atomic_long_read(&ima_num_entries[BINARY_STAGED]),
+			&ima_num_entries[BINARY]);
+	atomic_long_set(&ima_num_entries[BINARY_STAGED], 0);
+
+	if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
+		binary_runtime_size[BINARY_STAGED] -= size_to_remove;
+		binary_runtime_size[BINARY] +=
+					binary_runtime_size[BINARY_STAGED];
+		binary_runtime_size[BINARY_STAGED] = 0;
+	}
+
+	/*
+	 * Splice (prepend) any remaining non-deleted staged entries to the
+	 * active list (RCU not needed, there cannot be concurrent readers).
+	 */
+	list_splice(&ima_measurements_staged, &ima_measurements);
+	INIT_LIST_HEAD(&ima_measurements_staged);
+	mutex_unlock(&ima_extend_list_mutex);
+
+	ima_queue_delete(&ima_measurements_trim, false);
+	return ret;
+}
+
 static void ima_queue_delete(struct list_head *head, bool flush_htable)
 {
 	struct ima_queue_entry *qe, *qe_tmp;
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 12/13] ima: Return error on deleting staged measurements after kexec
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Refuse to delete staged measurements (both with prompt and without) if
a kexec racing with the deletion already prepended staged measurements in
the kexec buffer. In this way, user space becomes aware that staged
measurements are going to appear in the secondary kernel, and thus they
don't have to be saved twice.

Link: https://github.com/linux-integrity/linux/issues/1
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 security/integrity/ima/ima.h       |  1 +
 security/integrity/ima/ima_kexec.c |  3 +++
 security/integrity/ima/ima_queue.c | 17 +++++++++++++++++
 3 files changed, 21 insertions(+)

diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index de0693fce53c..0816b69f7c39 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -342,6 +342,7 @@ extern atomic_long_t ima_num_violations;
 extern struct hlist_head __rcu *ima_htable;
 extern struct mutex ima_extend_list_mutex;
 extern bool ima_flush_htable;
+extern bool ima_staged_measurements_prepended;
 
 static inline unsigned int ima_hash_key(u8 *digest)
 {
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index d5503dd5cc9b..25c8356be22a 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -116,6 +116,9 @@ static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
 			break;
 	}
 
+	if (!list_empty(&ima_measurements_staged))
+		ima_staged_measurements_prepended = true;
+
 	list_for_each_entry_rcu(qe, &ima_measurements, later,
 				lockdep_is_held(&ima_extend_list_mutex)) {
 		if (!ret)
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index 4fb557d61a88..1918c8ea9abb 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -72,6 +72,13 @@ DEFINE_MUTEX(ima_extend_list_mutex);
  */
 static bool ima_measurements_suspended;
 
+/*
+ * Used to determine if staged measurements were prepended during kexec,
+ * so that an error can be sent to user space during deletion, and they don't
+ * store staged measurements twice.
+ */
+bool ima_staged_measurements_prepended;
+
 /* Callers must call synchronize_rcu() and free the hash table. */
 static struct hlist_head *ima_alloc_replace_htable(void)
 {
@@ -344,6 +351,11 @@ int ima_queue_staged_delete_all(void)
 		return -ENOENT;
 	}
 
+	if (ima_staged_measurements_prepended) {
+		mutex_unlock(&ima_extend_list_mutex);
+		return -ESTALE;
+	}
+
 	list_replace(&ima_measurements_staged, &ima_measurements_trim);
 	INIT_LIST_HEAD(&ima_measurements_staged);
 
@@ -408,6 +420,11 @@ int ima_queue_staged_delete_partial(unsigned long req_value)
 		return -ENOENT;
 	}
 
+	if (ima_staged_measurements_prepended) {
+		mutex_unlock(&ima_extend_list_mutex);
+		return -ESTALE;
+	}
+
 	if (cut_pos != NULL)
 		/*
 		 * ima_dump_measurement_list() does not modify the list,
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 13/13] doc: security: Add documentation of the IMA staging mechanism
From: Roberto Sassu @ 2026-03-26 17:30 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-1-roberto.sassu@huaweicloud.com>

From: Roberto Sassu <roberto.sassu@huawei.com>

Add the documentation of the IMA staging mechanism in
Documentation/security/IMA-staging.rst.

Also add the missing Documentation/security/IMA-templates.rst file in
MAINTAINERS.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
 Documentation/security/IMA-staging.rst | 159 +++++++++++++++++++++++++
 Documentation/security/index.rst       |   1 +
 MAINTAINERS                            |   2 +
 3 files changed, 162 insertions(+)
 create mode 100644 Documentation/security/IMA-staging.rst

diff --git a/Documentation/security/IMA-staging.rst b/Documentation/security/IMA-staging.rst
new file mode 100644
index 000000000000..d0c27c0435c0
--- /dev/null
+++ b/Documentation/security/IMA-staging.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+IMA Measurements Staging Mechanism
+==================================
+
+
+Introduction
+============
+
+The IMA measurements list is currently stored in the kernel memory. Memory
+occupation grows linearly with the number of entries, and can become a
+problem especially in environments with reduced resources.
+
+While there is an advantage in keeping the IMA measurements list in kernel
+memory, so that it is always available for reading from the securityfs
+interfaces, storing it elsewhere would make it possible to free precious
+memory for other kernel components.
+
+Storing the IMA measurements list outside the kernel does not introduce
+security issues, since its integrity is anyway protected by the TPM.
+
+Hence, the new IMA staging mechanism is introduced to allow user space
+to remove the desired portion of the measurements list from the kernel.
+
+
+Usage
+=====
+
+The IMA staging mechanism can be enabled from the kernel configuration with
+the CONFIG_IMA_STAGING option.
+
+If it is enabled, IMA duplicates the current measurements interfaces (both
+binary and ASCII), by adding the ``_staged`` file suffix. Unlike the
+existing counterparts, the ``_staged`` interfaces have write permission for
+the root user and group, and require the process to have CAP_SYS_ADMIN set.
+
+The staging mechanism supports two flavors.
+
+
+Staging with prompt
+~~~~~~~~~~~~~~~~~~~
+
+This staging process is achieved with the following steps.
+
+ 1. ``echo A > <_staged interface>``: the user requests IMA to stage the
+    entire measurements list;
+ 2. ``cat <_staged interface>``: the user reads the staged measurements;
+ 3. ``echo D > <_staged interface>`` : the user request IMA to delete
+    staged measurements.
+
+
+Staging and deleting
+~~~~~~~~~~~~~~~~~~~~
+
+This staging process is achieved with the following steps.
+
+ 1. ``cat <_non_staged interface>``: the user reads the current
+    measurements list and determines what the value N for staging should
+    be;
+ 2. ``echo N > <_staged interface>``: the user requests IMA to delete N
+    measurements from the current measurements list.
+
+
+Interface Access
+================
+
+In order to avoid the IMA measurements list be suddenly truncated by the
+staging mechanism during a read, or having multiple concurrent staging, a
+semaphore-like locking scheme has been implemented on all the measurements
+list interfaces.
+
+Multiple readers can access concurrently the non-staged interfaces, and
+they can be in mutual exclusion with one writer accessing the staged
+interfaces.
+
+If an illegal access occurs, the open to the measurements list interface is
+denied.
+
+
+Management of Staged Measurements
+=================================
+
+Since with the staging mechanism measurements entries are removed from the
+kernel, the user needs to save the staged ones in a storage and concatenate
+them together, so that it can present them to remote attestation agents as
+if staging was never done.
+
+Coordination is necessary in the case where there are multiple actors
+requesting measurements to be staged.
+
+In the staging with prompt case, the staging interface can be accessed only
+by one actor at time, so the others will get an error until the former
+closes it. Since the actors don't care about N, when they gain access to
+the interface, they will get all the staged measurements at the time of
+their request.
+
+In the case of staging and deleting, coordination is more important, since
+there is the risk that two actors unaware of each other compute the value N
+on the current measurements list and request IMA to stage N twice.
+
+
+Remote Attestation Agent Workflow
+=================================
+
+Users can choose the staging method they find more appropriate for their
+workflow.
+
+If, as an example, a remote attestation agent would like to present to the
+remote attestation server only the measurements that are required to
+verify the TPM quote, its workflow would be the following.
+
+With staging with prompt, the agent stages the current measurements list,
+reads and stores the measurements in a storage and immediately requests
+IMA to delete the staged measurements from kernel memory. Afterwards, it
+calculates N by replaying the PCR extend on the stored measurements until
+the calculated PCRs match the quoted PCRs. It then keeps the measurements
+in excess for the next attestation request.
+
+At the next attestation request, the agent performs the same steps above,
+and concatenates the new measurements to the ones in excess from the
+previous request. Also in this case, the agent replays the PCR extend until
+it matches the currently quoted PCRs, keeps the measurements in excess and
+presents the new N measurements entries to the remote attestation server.
+
+With the staging with deleting method, the agents reads the current
+measurements list, calculates N and requests IMA to delete only those. The
+measurements in excess are kept in the IMA measurements list and can be
+retrieved at the next remote attestation request.
+
+Kexec
+=====
+
+In the event a kexec() system call occurs between staging and deleting, the
+staged measurements entries are prepended to the current measurements lists,
+so that they are both available when the secondary kernel starts. In that
+case, IMA returns an error to the user when attempting to delete staged
+measurements to notify about their copy to the kexec buffer, so that the
+user does not store them twice.
+
+
+Hash table
+==========
+
+By default, the template digest of staged measurement entries are kept in
+kernel memory (only template data are freed), to be able to detect
+duplicate entries independently of staging.
+
+The new kernel option ``ima_flush_htable`` has been introduced to
+explicitly request a complete deletion of the staged measurements, for
+maximum kernel memory saving. If the option has been specified, duplicate
+entries are still avoided on entries of the current measurements list,
+but there can be duplicates between different groups of staged
+measurements.
+
+Flushing the hash table is supported only for the staging with prompt
+flavor. For the staging and deleting flavor, it would have been necessary
+to lock the hot path adding new measurements for the time needed to remove
+each staged measurement individually.
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index 3e0a7114a862..cec064dc1c83 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -8,6 +8,7 @@ Security Documentation
    credentials
    snp-tdx-threat-model
    IMA-templates
+   IMA-staging
    keys/index
    lsm
    lsm-development
diff --git a/MAINTAINERS b/MAINTAINERS
index 04823afa8b74..e78dfb4f4482 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12668,6 +12668,8 @@ R:	Eric Snowberg <eric.snowberg@oracle.com>
 L:	linux-integrity@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
+F:	Documentation/security/IMA-staging.rst
+F:	Documentation/security/IMA-templates.rst
 F:	include/linux/secure_boot.h
 F:	security/integrity/
 F:	security/integrity/ima/
-- 
2.43.0


^ permalink raw reply related

* [GIT PULL] Landlock fix for v7.0-rc6
From: Mickaël Salaün @ 2026-03-26 17:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Mickaël Salaün, Dan Cojocaru, Günther Noack,
	Panagiotis Ivory Vasilopoulos, Yihan Ding, linux-kernel,
	linux-security-module

Hi,

This PR mainly fixes Landlock TSYNC issues related to interrupts and
unexpected task exit.  Other fixes touch documentation and sample, and a
new test extends coverage.

Please pull these changes for v7.0-rc6 .  These commits merge cleanly
with your master branch.  Kernel changes have been tested in the latest
linux-next releases for some weeks.

Test coverage for security/landlock is 91.0% of 2105 lines according to
LLVM 21, and it was (up to) 91.4% of 2093 lines before this PR.

syzkaller (also) reported the fixed issues:
- https://syzkaller.appspot.com/bug?extid=7ea2f5e9dfd468201817
- https://syzkaller.appspot.com/bug?extid=741e2278ef71fef03a10

Regards,
 Mickaël

--
The following changes since commit 11439c4635edd669ae435eec308f4ab8a0804808:

  Linux 7.0-rc2 (2026-03-01 15:39:31 -0800)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/mic/linux.git tags/landlock-7.0-rc6

for you to fetch changes up to a23811061a553c70c42de0e811b2ec15b2d54157:

  landlock: Expand restrict flags example for ABI version 8 (2026-03-24 20:55:55 +0100)

----------------------------------------------------------------
Landlock fix for v7.0-rc6

----------------------------------------------------------------
Günther Noack (1):
      samples/landlock: Bump ABI version to 8

Mickaël Salaün (4):
      landlock: Fix formatting
      landlock: Fully release unused TSYNC work entries
      landlock: Improve TSYNC types
      selftests/landlock: Test tsync interruption and cancellation paths

Panagiotis "Ivory" Vasilopoulos (1):
      landlock: Expand restrict flags example for ABI version 8

Yihan Ding (2):
      landlock: Serialize TSYNC thread restriction
      landlock: Clean up interrupted thread logic in TSYNC

 Documentation/userspace-api/landlock.rst      | 23 +++++--
 samples/landlock/sandboxer.c                  |  5 +-
 security/landlock/domain.c                    |  3 +-
 security/landlock/ruleset.c                   |  9 ++-
 security/landlock/tsync.c                     | 92 +++++++++++++++++++++------
 tools/testing/selftests/landlock/tsync_test.c | 91 +++++++++++++++++++++++++-
 6 files changed, 190 insertions(+), 33 deletions(-)

^ permalink raw reply

* [PATCH v4 00/13] ima: Introduce staging mechanism
From: Roberto Sassu @ 2026-03-26 17:29 UTC (permalink / raw)
  To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
	jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, chenste, nramas, Roberto Sassu

From: Roberto Sassu <roberto.sassu@huawei.com>

Introduction
============

The IMA measurements list is currently stored in the kernel memory. Memory
occupation grows linearly with the number of entries, and can become a
problem especially in environments with reduced resources.

While there is an advantage in keeping the IMA measurements list in kernel
memory, so that it is always available for reading from the securityfs
interfaces, storing it elsewhere would make it possible to free precious
memory for other kernel components.

Storing the IMA measurements list outside the kernel does not introduce
security issues, since its integrity is anyway protected by the TPM.

Hence, the new IMA staging mechanism is introduced to allow user space
to remove the desired portion of the measurements list from the kernel.

The IMA staging mechanism can be enabled from the kernel configuration with
the CONFIG_IMA_STAGING option.

If it is enabled, IMA duplicates the current measurements interfaces (both
binary and ASCII), by adding the ``_staged`` file suffix. Unlike the
existing counterparts, the ``_staged`` interfaces have write permission for
the root user and group, and require the process to have CAP_SYS_ADMIN set.

The staging mechanism supports two flavors.

Staging with prompt:

 1. ``echo A > <_staged interface>``: the user requests IMA to stage the
    entire measurements list;
 2. ``cat <_staged interface>``: the user reads the staged measurements;
 3. ``echo D > <_staged interface>`` : the user request IMA to delete
    staged measurements.

Staging and deleting:

 1. ``cat <_non_staged interface>``: the user reads the current
    measurements list and determines what the value N for staging should
    be;
 2. ``echo N > <_staged interface>``: the user requests IMA to delete N
    measurements from the current measurements list.

Since with the staging mechanism measurements entries are removed from the
kernel, the user needs to save the staged ones in a storage and concatenate
them together, so that it can present them to remote attestation agents as
if staging was never done.

Patch set content
=================

Patches 1-8 are preparatory patches to quickly replace the hash table,
maintain separate counters for the different measurements list types,
mediate access to the measurements list interface, and simplify the staging
patches.

Patch 9 introduces the staging with prompt flavor. Patch 10 makes it
possible to flush the hash table when deleting all the staged measurements.
Patch 11 introduces the staging and deleting flavor. Patch 12 avoids staged
measurements entries to be stored twice if there is contention between the
measurements interfaces and kexec. Patch 13 adds the documentation of the
staging mechanism.

Changelog
=========

v3:
 - Add Kconfig option to enable the staging mechanism (suggested by Mimi)
 - Change the meaning of BINARY_STAGED to be just the staged measurements
 - Separate the two staging flavors in two different functions:
   ima_queue_staged_delete_all() for staging with prompt,
   ima_queue_staged_delete_partial() for staging and deleting
 - Delete N entries without staging first (suggested by Mimi)
 - Avoid duplicate staged entries if there is contention between the
   measurements list interfaces and kexec

v2:
 - New patch to move measurements and violation counters outside the
   ima_h_table structure
 - New patch to quickly replace the hash table
 - Forbid partial deletion when flushing hash table (suggested by Mimi)
 - Ignore ima_flush_htable if CONFIG_IMA_DISABLE_HTABLE is enabled
 - BINARY_SIZE_* renamed to BINARY_* for better clarity
 - Removed ima_measurements_staged_exist and testing list empty instead
 - ima_queue_stage_trim() and ima_queue_delete_staged_trimmed() renamed to
   ima_queue_stage() and ima_queue_delete_staged()
 - New delete interval [1, ULONG_MAX - 1]
 - Rename ima_measure_lock to ima_measure_mutex
 - Move seq_open() and seq_release() outside the ima_measure_mutex lock
 - Drop ima_measurements_staged_read() and use seq_read() instead
 - Optimize create_securityfs_measurement_lists() changes
 - New file name format with _staged suffix at the end of the file name
 - Use _rcu list variant in ima_dump_measurement_list()
 - Remove support for direct trimming and splice the remaining entries to
   the active list (suggested by Mimi)
 - Hot swap the hash table if flushing is requested

v1:
 - Support for direct trimming without staging
 - Support unstaging on kexec (requested by Gregory Lumen)

Roberto Sassu (13):
  ima: Remove ima_h_table structure
  ima: Replace static htable queue with dynamically allocated array
  ima: Introduce per binary measurements list type ima_num_entries
    counter
  ima: Introduce per binary measurements list type binary_runtime_size
    value
  ima: Introduce _ima_measurements_start() and _ima_measurements_next()
  ima: Mediate open/release method of the measurements list
  ima: Use snprintf() in create_securityfs_measurement_lists
  ima: Introduce ima_dump_measurement()
  ima: Add support for staging measurements with prompt
  ima: Add support for flushing the hash table when staging measurements
  ima: Support staging and deleting N measurements entries
  ima: Return error on deleting staged measurements after kexec
  doc: security: Add documentation of the IMA staging mechanism

 .../admin-guide/kernel-parameters.txt         |   4 +
 Documentation/security/IMA-staging.rst        | 159 +++++++++
 Documentation/security/index.rst              |   1 +
 MAINTAINERS                                   |   2 +
 security/integrity/ima/Kconfig                |  16 +
 security/integrity/ima/ima.h                  |  28 +-
 security/integrity/ima/ima_api.c              |   2 +-
 security/integrity/ima/ima_fs.c               | 302 +++++++++++++++--
 security/integrity/ima/ima_init.c             |   5 +
 security/integrity/ima/ima_kexec.c            |  47 ++-
 security/integrity/ima/ima_queue.c            | 310 ++++++++++++++++--
 11 files changed, 803 insertions(+), 73 deletions(-)
 create mode 100644 Documentation/security/IMA-staging.rst

-- 
2.43.0

^ permalink raw reply

* Re: [GIT PULL] Landlock fix for v7.0-rc6
From: pr-tracker-bot @ 2026-03-26 19:48 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: Linus Torvalds, Mickaël Salaün, Dan Cojocaru,
	Günther Noack, Panagiotis Ivory Vasilopoulos, Yihan Ding,
	linux-kernel, linux-security-module
In-Reply-To: <20260326172727.119094-1-mic@digikod.net>

The pull request you sent on Thu, 26 Mar 2026 18:27:27 +0100:

> https://git.kernel.org/pub/scm/linux/kernel/git/mic/linux.git tags/landlock-7.0-rc6

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/25b69ebe28c8e3f883b071e924b87d358db56047

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply

* Re: [PATCH v3 0/8] module: Move 'struct module_signature' to UAPI
From: Sami Tolvanen @ 2026-03-26 20:06 UTC (permalink / raw)
  To: David Howells, David Woodhouse, Luis Chamberlain, Petr Pavlu,
	Daniel Gomez, Aaron Tomlin, Heiko Carstens, Vasily Gorbik,
	Alexander Gordeev, Christian Borntraeger, Sven Schnelle,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg,
	Paul Moore, James Morris, Serge E. Hallyn, Nathan Chancellor,
	Nicolas Schier, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Shuah Khan, Thomas Weißschuh
  Cc: keyrings, linux-kernel, linux-modules, linux-s390,
	linux-integrity, linux-security-module, linux-kbuild, bpf,
	linux-kselftest
In-Reply-To: <20260305-module-signature-uapi-v3-0-92f45ea6028c@linutronix.de>

On Thu, 05 Mar 2026 10:31:36 +0100, Thomas Weißschuh wrote:
> This structure definition is used outside the kernel proper.
> For example in kmod and the kernel build environment.
> 
> To allow reuse, move it to a new UAPI header.
> 
> While it is not a true UAPI, it is a common practice to have
> non-UAPI interface definitions in the kernel's UAPI headers.
> 
> [...]

Applied to modules-next, thanks!

[1/8] extract-cert: drop unused definition of PKEY_ID_PKCS7
      commit: 137676d4482d8b8d755890b4ed29fe8223661d20
[2/8] module: Drop unused signature types
      commit: 8988913aacee82e5401bf3b96839731982dcbde7
[3/8] module: Give 'enum pkey_id_type' a more specific name
      commit: acd87264af525dba6e9355310e8acdf066a5f6b5
[4/8] module: Give MODULE_SIG_STRING a more descriptive name
      commit: 2ae4ea2d9aaf25cb74fbc23450b1b8f0a5b7aa89
[5/8] module: Move 'struct module_signature' to UAPI
      commit: f9909cf0a2dcc9e99377f3fcc965ccd93e518e34
[6/8] tools uapi headers: add linux/module_signature.h
      commit: d2d7561dc656748f592cc34d34bf5db8d5c67f7b
[7/8] sign-file: use 'struct module_signature' from the UAPI headers
      commit: e340db306c3bb85877490f33a78eb80549ac43a7
[8/8] selftests/bpf: verify_pkcs7_sig: Use 'struct module_signature' from the UAPI headers
      commit: 55722b3f80377103bac6ac748554129108c75651

Best regards,

	Sami



^ permalink raw reply

* Re: [PATCH v4 09/13] ima: Add support for staging measurements with prompt
From: steven chen @ 2026-03-26 22:44 UTC (permalink / raw)
  To: Roberto Sassu, corbet, skhan, zohar, dmitry.kasatkin,
	eric.snowberg, paul, jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, nramas, Roberto Sassu, steven chen
In-Reply-To: <20260326173011.1191815-10-roberto.sassu@huaweicloud.com>

On 3/26/2026 10:30 AM, Roberto Sassu wrote:
> From: Roberto Sassu <roberto.sassu@huawei.com>
>
> Introduce the ability of staging the IMA measurement list and deleting them
> with a prompt.
>
> Staging means moving the current content of the measurement list to a
> separate location, and allowing users to read and delete it. This causes
> the measurement list to be atomically truncated before new measurements can
> be added. Staging can be done only once at a time. In the event of kexec(),
> staging is reverted and staged entries will be carried over to the new
> kernel.
>
> Introduce ascii_runtime_measurements_<algo>_staged and
> binary_runtime_measurements_<algo>_staged interfaces to stage and delete
> the measurements. Use 'echo A > <IMA interface>' and
> 'echo D > <IMA interface>' to respectively stage and delete the entire
> measurements list. Locking of these interfaces is also mediated with a call
> to _ima_measurements_open() and with ima_measurements_release().
>
> Implement the staging functionality by introducing the new global
> measurements list ima_measurements_staged, and ima_queue_stage() and
> ima_queue_delete_staged_all() to respectively move measurements from the
> current measurements list to the staged one, and to move staged
> measurements to the ima_measurements_trim list for deletion. Introduce
> ima_queue_delete() to delete the measurements.
>
> Finally, introduce the BINARY_STAGED AND BINARY_FULL binary measurements
> list types, to maintain the counters and the binary size of staged
> measurements and the full measurements list (including entries that were
> staged). BINARY still represents the current binary measurements list.
>
> Use the binary size for the BINARY + BINARY_STAGED types in
> ima_add_kexec_buffer(), since both measurements list types are copied to
> the secondary kernel during kexec. Use BINARY_FULL in
> ima_measure_kexec_event(), to generate a critical data record.
>
> It should be noted that the BINARY_FULL counter is not passed through
> kexec. Thus, the number of entries included in the kexec critical data
> records refers to the entries since the previous kexec records.
>
> Note: This code derives from the Alt-IMA Huawei project, whose license is
>        GPL-2.0 OR MIT.
>
> Link: https://github.com/linux-integrity/linux/issues/1
> Suggested-by: Gregory Lumen <gregorylumen@linux.microsoft.com> (staging revert)
> Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
> ---
>   security/integrity/ima/Kconfig     |  13 +++
>   security/integrity/ima/ima.h       |   8 +-
>   security/integrity/ima/ima_fs.c    | 167 ++++++++++++++++++++++++++---
>   security/integrity/ima/ima_kexec.c |  22 +++-
>   security/integrity/ima/ima_queue.c |  97 ++++++++++++++++-
>   5 files changed, 286 insertions(+), 21 deletions(-)
>
> diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
> index 976e75f9b9ba..e714726f3384 100644
> --- a/security/integrity/ima/Kconfig
> +++ b/security/integrity/ima/Kconfig
> @@ -332,4 +332,17 @@ config IMA_KEXEC_EXTRA_MEMORY_KB
>   	  If set to the default value of 0, an extra half page of memory for those
>   	  additional measurements will be allocated.
>   
> +config IMA_STAGING
> +	bool "Support for staging the measurements list"
> +	default y
> +	help
> +	  Add support for staging the measurements list.
> +
> +	  It allows user space to stage the measurements list for deletion and
> +	  to delete the staged measurements after confirmation.
> +
> +	  On kexec, staging is reverted and staged measurements are prepended
> +	  to the current measurements list when measurements are copied to the
> +	  secondary kernel.
> +
>   endif
> diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> index 97b7d6024b5d..65db152a0a24 100644
> --- a/security/integrity/ima/ima.h
> +++ b/security/integrity/ima/ima.h
> @@ -30,9 +30,11 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8, TPM_PCR10 = 10 };
>   
>   /*
>    * BINARY: current binary measurements list
> + * BINARY_STAGED: staged binary measurements list
> + * BINARY_FULL: binary measurements list since IMA init (lost after kexec)
>    */
>   enum binary_lists {
> -	BINARY, BINARY__LAST
> +	BINARY, BINARY_STAGED, BINARY_FULL, BINARY__LAST
>   };
>   
>   /* digest size for IMA, fits SHA1 or MD5 */
> @@ -125,6 +127,7 @@ struct ima_queue_entry {
>   	struct ima_template_entry *entry;
>   };
>   extern struct list_head ima_measurements;	/* list of all measurements */
> +extern struct list_head ima_measurements_staged; /* list of staged meas. */
>   
>   /* Some details preceding the binary serialized measurement list */
>   struct ima_kexec_hdr {
> @@ -314,6 +317,8 @@ struct ima_template_desc *ima_template_desc_current(void);
>   struct ima_template_desc *ima_template_desc_buf(void);
>   struct ima_template_desc *lookup_template_desc(const char *name);
>   bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
> +int ima_queue_stage(void);
> +int ima_queue_staged_delete_all(void);
>   int ima_restore_measurement_entry(struct ima_template_entry *entry);
>   int ima_restore_measurement_list(loff_t bufsize, void *buf);
>   int ima_measurements_show(struct seq_file *m, void *v);
> @@ -334,6 +339,7 @@ extern spinlock_t ima_queue_lock;
>   extern atomic_long_t ima_num_entries[BINARY__LAST];
>   extern atomic_long_t ima_num_violations;
>   extern struct hlist_head __rcu *ima_htable;
> +extern struct mutex ima_extend_list_mutex;
>   
>   static inline unsigned int ima_hash_key(u8 *digest)
>   {
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 7709a4576322..39d9128e9f22 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -24,6 +24,13 @@
>   
>   #include "ima.h"
>   
> +/*
> + * Requests:
> + * 'A\n': stage the entire measurements list
> + * 'D\n': delete all staged measurements
> + */
> +#define STAGED_REQ_LENGTH 21
> +
>   static DEFINE_MUTEX(ima_write_mutex);
>   static DEFINE_MUTEX(ima_measure_mutex);
>   static long ima_measure_users;
> @@ -97,6 +104,11 @@ static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
>   	return _ima_measurements_start(m, pos, &ima_measurements);
>   }
>   
> +static void *ima_measurements_staged_start(struct seq_file *m, loff_t *pos)
> +{
> +	return _ima_measurements_start(m, pos, &ima_measurements_staged);
> +}
> +
>   static void *_ima_measurements_next(struct seq_file *m, void *v, loff_t *pos,
>   				    struct list_head *head)
>   {
> @@ -118,6 +130,12 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
>   	return _ima_measurements_next(m, v, pos, &ima_measurements);
>   }
>   
> +static void *ima_measurements_staged_next(struct seq_file *m, void *v,
> +					  loff_t *pos)
> +{
> +	return _ima_measurements_next(m, v, pos, &ima_measurements_staged);
> +}
> +
>   static void ima_measurements_stop(struct seq_file *m, void *v)
>   {
>   }
> @@ -283,6 +301,68 @@ static const struct file_operations ima_measurements_ops = {
>   	.release = ima_measurements_release,
>   };
>   
> +static const struct seq_operations ima_measurments_staged_seqops = {
> +	.start = ima_measurements_staged_start,
> +	.next = ima_measurements_staged_next,
> +	.stop = ima_measurements_stop,
> +	.show = ima_measurements_show
> +};
> +
> +static int ima_measurements_staged_open(struct inode *inode, struct file *file)
> +{
> +	return _ima_measurements_open(inode, file,
> +				      &ima_measurments_staged_seqops);
> +}
> +
> +static ssize_t ima_measurements_staged_write(struct file *file,
> +					     const char __user *buf,
> +					     size_t datalen, loff_t *ppos)
> +{
> +	char req[STAGED_REQ_LENGTH];
> +	int ret;
> +
> +	if (*ppos > 0 || datalen < 2 || datalen > STAGED_REQ_LENGTH)
> +		return -EINVAL;
> +
> +	if (copy_from_user(req, buf, datalen) != 0)
> +		return -EFAULT;
> +
> +	if (req[datalen - 1] != '\n')
> +		return -EINVAL;
> +
> +	req[datalen - 1] = '\0';
> +
> +	switch (req[0]) {
> +	case 'A':
> +		if (datalen != 2)
> +			return -EINVAL;
> +
> +		ret = ima_queue_stage();
> +		break;
> +	case 'D':
> +		if (datalen != 2)
> +			return -EINVAL;
> +
> +		ret = ima_queue_staged_delete_all();
> +		break;

I think the following two steps may not work because of race condition:

step1: ret = ima_queue_stage(); //this will put all logs in active list into staged list;
step2: ret = ima_queue_staged_delete_all(); //this will delete all logs in staged list;

The following is the step of race condition:
     1. current active log list LA1;
     2. user agent read the TPM quote QA1 match list LA1;
     3. new event NewLog is added into active log list LA1+NewLog
     4. user agent call ima_queue_stage() and generated staged list
        including LA1+NewLog.
     5. user agent call ima_queue_staged_delete_all();
        The new log NewLog in step 3 is also deleted

Next time the attestation will fail if using the active log list in the 
kernel.

Thanks,

Steven

> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	return datalen;
> +}
> +
> +static const struct file_operations ima_measurements_staged_ops = {
> +	.open = ima_measurements_staged_open,
> +	.read = seq_read,
> +	.write = ima_measurements_staged_write,
> +	.llseek = seq_lseek,
> +	.release = ima_measurements_release,
> +};
> +
>   void ima_print_digest(struct seq_file *m, u8 *digest, u32 size)
>   {
>   	u32 i;
> @@ -356,6 +436,28 @@ static const struct file_operations ima_ascii_measurements_ops = {
>   	.release = ima_measurements_release,
>   };
>   
> +static const struct seq_operations ima_ascii_measurements_staged_seqops = {
> +	.start = ima_measurements_staged_start,
> +	.next = ima_measurements_staged_next,
> +	.stop = ima_measurements_stop,
> +	.show = ima_ascii_measurements_show
> +};
> +
> +static int ima_ascii_measurements_staged_open(struct inode *inode,
> +					      struct file *file)
> +{
> +	return _ima_measurements_open(inode, file,
> +				      &ima_ascii_measurements_staged_seqops);
> +}
> +
> +static const struct file_operations ima_ascii_measurements_staged_ops = {
> +	.open = ima_ascii_measurements_staged_open,
> +	.read = seq_read,
> +	.write = ima_measurements_staged_write,
> +	.llseek = seq_lseek,
> +	.release = ima_measurements_release,
> +};
> +
>   static ssize_t ima_read_policy(char *path)
>   {
>   	void *data = NULL;
> @@ -459,10 +561,21 @@ static const struct seq_operations ima_policy_seqops = {
>   };
>   #endif
>   
> -static int __init create_securityfs_measurement_lists(void)
> +static int __init create_securityfs_measurement_lists(bool staging)
>   {
> +	const struct file_operations *ascii_ops = &ima_ascii_measurements_ops;
> +	const struct file_operations *binary_ops = &ima_measurements_ops;
> +	mode_t permissions = S_IRUSR | S_IRGRP;
> +	const char *file_suffix = "";
>   	int count = NR_BANKS(ima_tpm_chip);
>   
> +	if (staging) {
> +		ascii_ops = &ima_ascii_measurements_staged_ops;
> +		binary_ops = &ima_measurements_staged_ops;
> +		file_suffix = "_staged";
> +		permissions |= (S_IWUSR | S_IWGRP);
> +	}
> +
>   	if (ima_sha1_idx >= NR_BANKS(ima_tpm_chip))
>   		count++;
>   
> @@ -473,29 +586,32 @@ static int __init create_securityfs_measurement_lists(void)
>   
>   		if (algo == HASH_ALGO__LAST)
>   			snprintf(file_name, sizeof(file_name),
> -				 "ascii_runtime_measurements_tpm_alg_%x",
> -				 ima_tpm_chip->allocated_banks[i].alg_id);
> +				 "ascii_runtime_measurements_tpm_alg_%x%s",
> +				 ima_tpm_chip->allocated_banks[i].alg_id,
> +				 file_suffix);
>   		else
>   			snprintf(file_name, sizeof(file_name),
> -				 "ascii_runtime_measurements_%s",
> -				 hash_algo_name[algo]);
> -		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
> +				 "ascii_runtime_measurements_%s%s",
> +				 hash_algo_name[algo], file_suffix);
> +		dentry = securityfs_create_file(file_name, permissions,
>   						ima_dir, (void *)(uintptr_t)i,
> -						&ima_ascii_measurements_ops);
> +						ascii_ops);
>   		if (IS_ERR(dentry))
>   			return PTR_ERR(dentry);
>   
>   		if (algo == HASH_ALGO__LAST)
>   			snprintf(file_name, sizeof(file_name),
> -				 "binary_runtime_measurements_tpm_alg_%x",
> -				 ima_tpm_chip->allocated_banks[i].alg_id);
> +				 "binary_runtime_measurements_tpm_alg_%x%s",
> +				 ima_tpm_chip->allocated_banks[i].alg_id,
> +				 file_suffix);
>   		else
>   			snprintf(file_name, sizeof(file_name),
> -				 "binary_runtime_measurements_%s",
> -				 hash_algo_name[algo]);
> -		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
> +				 "binary_runtime_measurements_%s%s",
> +				 hash_algo_name[algo], file_suffix);
> +
> +		dentry = securityfs_create_file(file_name, permissions,
>   						ima_dir, (void *)(uintptr_t)i,
> -						&ima_measurements_ops);
> +						binary_ops);
>   		if (IS_ERR(dentry))
>   			return PTR_ERR(dentry);
>   	}
> @@ -503,6 +619,23 @@ static int __init create_securityfs_measurement_lists(void)
>   	return 0;
>   }
>   
> +static int __init create_securityfs_staging_links(void)
> +{
> +	struct dentry *dentry;
> +
> +	dentry = securityfs_create_symlink("binary_runtime_measurements_staged",
> +		ima_dir, "binary_runtime_measurements_sha1_staged", NULL);
> +	if (IS_ERR(dentry))
> +		return PTR_ERR(dentry);
> +
> +	dentry = securityfs_create_symlink("ascii_runtime_measurements_staged",
> +		ima_dir, "ascii_runtime_measurements_sha1_staged", NULL);
> +	if (IS_ERR(dentry))
> +		return PTR_ERR(dentry);
> +
> +	return 0;
> +}
> +
>   /*
>    * ima_open_policy: sequentialize access to the policy file
>    */
> @@ -595,7 +728,13 @@ int __init ima_fs_init(void)
>   		goto out;
>   	}
>   
> -	ret = create_securityfs_measurement_lists();
> +	ret = create_securityfs_measurement_lists(false);
> +	if (ret == 0 && IS_ENABLED(CONFIG_IMA_STAGING)) {
> +		ret = create_securityfs_measurement_lists(true);
> +		if (ret == 0)
> +			ret = create_securityfs_staging_links();
> +	}
> +
>   	if (ret != 0)
>   		goto out;
>   
> diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
> index d7d0fb639d99..d5503dd5cc9b 100644
> --- a/security/integrity/ima/ima_kexec.c
> +++ b/security/integrity/ima/ima_kexec.c
> @@ -42,8 +42,8 @@ void ima_measure_kexec_event(const char *event_name)
>   	long len;
>   	int n;
>   
> -	buf_size = ima_get_binary_runtime_size(BINARY);
> -	len = atomic_long_read(&ima_num_entries[BINARY]);
> +	buf_size = ima_get_binary_runtime_size(BINARY_FULL);
> +	len = atomic_long_read(&ima_num_entries[BINARY_FULL]);
>   
>   	n = scnprintf(ima_kexec_event, IMA_KEXEC_EVENT_LEN,
>   		      "kexec_segment_size=%lu;ima_binary_runtime_size=%lu;"
> @@ -106,13 +106,26 @@ static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
>   
>   	memset(&khdr, 0, sizeof(khdr));
>   	khdr.version = 1;
> -	/* This is an append-only list, no need to hold the RCU read lock */
> -	list_for_each_entry_rcu(qe, &ima_measurements, later, true) {
> +	/* It can race with ima_queue_stage() and ima_queue_delete_staged(). */
> +	mutex_lock(&ima_extend_list_mutex);
> +
> +	list_for_each_entry_rcu(qe, &ima_measurements_staged, later,
> +				lockdep_is_held(&ima_extend_list_mutex)) {
>   		ret = ima_dump_measurement(&khdr, qe);
>   		if (ret < 0)
>   			break;
>   	}
>   
> +	list_for_each_entry_rcu(qe, &ima_measurements, later,
> +				lockdep_is_held(&ima_extend_list_mutex)) {
> +		if (!ret)
> +			ret = ima_dump_measurement(&khdr, qe);
> +		if (ret < 0)
> +			break;
> +	}
> +
> +	mutex_unlock(&ima_extend_list_mutex);
> +
>   	/*
>   	 * fill in reserved space with some buffer details
>   	 * (eg. version, buffer size, number of measurements)
> @@ -167,6 +180,7 @@ void ima_add_kexec_buffer(struct kimage *image)
>   		extra_memory = CONFIG_IMA_KEXEC_EXTRA_MEMORY_KB * 1024;
>   
>   	binary_runtime_size = ima_get_binary_runtime_size(BINARY) +
> +			      ima_get_binary_runtime_size(BINARY_STAGED) +
>   			      extra_memory;
>   
>   	if (binary_runtime_size >= ULONG_MAX - PAGE_SIZE)
> diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
> index b6d10dceb669..50519ed837d4 100644
> --- a/security/integrity/ima/ima_queue.c
> +++ b/security/integrity/ima/ima_queue.c
> @@ -26,6 +26,7 @@
>   static struct tpm_digest *digests;
>   
>   LIST_HEAD(ima_measurements);	/* list of all measurements */
> +LIST_HEAD(ima_measurements_staged); /* list of staged measurements */
>   #ifdef CONFIG_IMA_KEXEC
>   static unsigned long binary_runtime_size[BINARY__LAST];
>   #else
> @@ -45,11 +46,11 @@ atomic_long_t ima_num_violations = ATOMIC_LONG_INIT(0);
>   /* key: inode (before secure-hashing a file) */
>   struct hlist_head __rcu *ima_htable;
>   
> -/* mutex protects atomicity of extending measurement list
> +/* mutex protects atomicity of extending and staging measurement list
>    * and extending the TPM PCR aggregate. Since tpm_extend can take
>    * long (and the tpm driver uses a mutex), we can't use the spinlock.
>    */
> -static DEFINE_MUTEX(ima_extend_list_mutex);
> +DEFINE_MUTEX(ima_extend_list_mutex);
>   
>   /*
>    * Used internally by the kernel to suspend measurements.
> @@ -174,12 +175,16 @@ static int ima_add_digest_entry(struct ima_template_entry *entry,
>   				lockdep_is_held(&ima_extend_list_mutex));
>   
>   	atomic_long_inc(&ima_num_entries[BINARY]);
> +	atomic_long_inc(&ima_num_entries[BINARY_FULL]);
> +
>   	if (update_htable) {
>   		key = ima_hash_key(entry->digests[ima_hash_algo_idx].digest);
>   		hlist_add_head_rcu(&qe->hnext, &htable[key]);
>   	}
>   
>   	ima_update_binary_runtime_size(entry, BINARY);
> +	ima_update_binary_runtime_size(entry, BINARY_FULL);
> +
>   	return 0;
>   }
>   
> @@ -280,6 +285,94 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
>   	return result;
>   }
>   
> +int ima_queue_stage(void)
> +{
> +	int ret = 0;
> +
> +	mutex_lock(&ima_extend_list_mutex);
> +	if (!list_empty(&ima_measurements_staged)) {
> +		ret = -EEXIST;
> +		goto out_unlock;
> +	}
> +
> +	if (list_empty(&ima_measurements)) {
> +		ret = -ENOENT;
> +		goto out_unlock;
> +	}
> +
> +	list_replace(&ima_measurements, &ima_measurements_staged);
> +	INIT_LIST_HEAD(&ima_measurements);
> +
> +	atomic_long_set(&ima_num_entries[BINARY_STAGED],
> +			atomic_long_read(&ima_num_entries[BINARY]));
> +	atomic_long_set(&ima_num_entries[BINARY], 0);
> +
> +	if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
> +		binary_runtime_size[BINARY_STAGED] =
> +					binary_runtime_size[BINARY];
> +		binary_runtime_size[BINARY] = 0;
> +	}
> +out_unlock:
> +	mutex_unlock(&ima_extend_list_mutex);
> +	return ret;
> +}
> +
> +static void ima_queue_delete(struct list_head *head);
> +
> +int ima_queue_staged_delete_all(void)
> +{
> +	LIST_HEAD(ima_measurements_trim);
> +
> +	mutex_lock(&ima_extend_list_mutex);
> +	if (list_empty(&ima_measurements_staged)) {
> +		mutex_unlock(&ima_extend_list_mutex);
> +		return -ENOENT;
> +	}
> +
> +	list_replace(&ima_measurements_staged, &ima_measurements_trim);
> +	INIT_LIST_HEAD(&ima_measurements_staged);
> +
> +	atomic_long_set(&ima_num_entries[BINARY_STAGED], 0);
> +
> +	if (IS_ENABLED(CONFIG_IMA_KEXEC))
> +		binary_runtime_size[BINARY_STAGED] = 0;
> +
> +	mutex_unlock(&ima_extend_list_mutex);
> +
> +	ima_queue_delete(&ima_measurements_trim);
> +	return 0;
> +}
> +
> +static void ima_queue_delete(struct list_head *head)
> +{
> +	struct ima_queue_entry *qe, *qe_tmp;
> +	unsigned int i;
> +
> +	list_for_each_entry_safe(qe, qe_tmp, head, later) {
> +		/*
> +		 * Safe to free template_data here without synchronize_rcu()
> +		 * because the only htable reader, ima_lookup_digest_entry(),
> +		 * accesses only entry->digests, not template_data. If new
> +		 * htable readers are added that access template_data, a
> +		 * synchronize_rcu() is required here.
> +		 */
> +		for (i = 0; i < qe->entry->template_desc->num_fields; i++) {
> +			kfree(qe->entry->template_data[i].data);
> +			qe->entry->template_data[i].data = NULL;
> +			qe->entry->template_data[i].len = 0;
> +		}
> +
> +		list_del(&qe->later);
> +
> +		/* No leak if condition is false, referenced by ima_htable. */
> +		if (IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) {
> +			kfree(qe->entry->digests);
> +			kfree(qe->entry);
> +			kfree(qe);
> +		}
> +	}
> +}
> +
>   int ima_restore_measurement_entry(struct ima_template_entry *entry)
>   {
>   	int result = 0;



^ permalink raw reply

* Re: [PATCH v4 11/13] ima: Support staging and deleting N measurements entries
From: steven chen @ 2026-03-26 23:19 UTC (permalink / raw)
  To: Roberto Sassu, corbet, skhan, zohar, dmitry.kasatkin,
	eric.snowberg, paul, jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, nramas, Roberto Sassu
In-Reply-To: <20260326173011.1191815-12-roberto.sassu@huaweicloud.com>

On 3/26/2026 10:30 AM, Roberto Sassu wrote:
> From: Roberto Sassu <roberto.sassu@huawei.com>
>
> Add support for sending a value N between 1 and ULONG_MAX to the staging
> interface. This value represents the number of measurements that should be
> deleted from the current measurements list.
>
> This staging method allows the remote attestation agents to easily separate
> the measurements that were verified (staged and deleted) from those that
> weren't due to the race between taking a TPM quote and reading the
> measurements list.
>
> In order to minimize the locking time of ima_extend_list_mutex, deleting
> N entries is realized by staging the entire current measurements list
> (with the lock), by determining the N-th staged entry (without the lock),
> and by splicing the entries in excess back to the current measurements list
> (with the lock). Finally, the N entries are deleted (without the lock).
>
> Flushing the hash table is not supported for N entries, since it would
> require removing the N entries one by one from the hash table under the
> ima_extend_list_mutex lock, which would increase the locking time.
>
> The ima_extend_list_mutex lock is necessary in ima_dump_measurement_list()
> because ima_queue_staged_delete_partial() uses __list_cut_position() to
> modify ima_measurements_staged, for which no RCU-safe variant exists. For
> the staging with prompt flavor alone, list_replace_rcu() could have been
> used instead, but since both flavors share the same kexec serialization
> path, the mutex is required regardless.
>
> Link: https://github.com/linux-integrity/linux/issues/1
> Suggested-by: Steven Chen <chenste@linux.microsoft.com>
> Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
> ---
>   security/integrity/ima/Kconfig     |  3 ++
>   security/integrity/ima/ima.h       |  1 +
>   security/integrity/ima/ima_fs.c    | 22 +++++++++-
>   security/integrity/ima/ima_queue.c | 70 ++++++++++++++++++++++++++++++
>   4 files changed, 95 insertions(+), 1 deletion(-)
>
> diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
> index e714726f3384..6ddb4e77bff5 100644
> --- a/security/integrity/ima/Kconfig
> +++ b/security/integrity/ima/Kconfig
> @@ -341,6 +341,9 @@ config IMA_STAGING
>   	  It allows user space to stage the measurements list for deletion and
>   	  to delete the staged measurements after confirmation.
>   
> +	  Or, alternatively, it allows user space to specify N measurements
> +	  entries to be deleted.
> +
>   	  On kexec, staging is reverted and staged measurements are prepended
>   	  to the current measurements list when measurements are copied to the
>   	  secondary kernel.
> diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> index 699b735dec7d..de0693fce53c 100644
> --- a/security/integrity/ima/ima.h
> +++ b/security/integrity/ima/ima.h
> @@ -319,6 +319,7 @@ struct ima_template_desc *lookup_template_desc(const char *name);
>   bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
>   int ima_queue_stage(void);
>   int ima_queue_staged_delete_all(void);
> +int ima_queue_staged_delete_partial(unsigned long req_value);
>   int ima_restore_measurement_entry(struct ima_template_entry *entry);
>   int ima_restore_measurement_list(loff_t bufsize, void *buf);
>   int ima_measurements_show(struct seq_file *m, void *v);
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 39d9128e9f22..eb3f343c1138 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -28,6 +28,7 @@
>    * Requests:
>    * 'A\n': stage the entire measurements list
>    * 'D\n': delete all staged measurements
> + * '[1, ULONG_MAX]\n' delete N measurements entries
>    */
>   #define STAGED_REQ_LENGTH 21
>   
> @@ -319,6 +320,7 @@ static ssize_t ima_measurements_staged_write(struct file *file,
>   					     size_t datalen, loff_t *ppos)
>   {
>   	char req[STAGED_REQ_LENGTH];
> +	unsigned long req_value;
>   	int ret;
>   
>   	if (*ppos > 0 || datalen < 2 || datalen > STAGED_REQ_LENGTH)
> @@ -346,7 +348,25 @@ static ssize_t ima_measurements_staged_write(struct file *file,
>   		ret = ima_queue_staged_delete_all();
>   		break;
>   	default:
> -		ret = -EINVAL;
> +		if (ima_flush_htable) {
> +			pr_debug("Deleting staged N measurements not supported when flushing the hash table is requested\n");
> +			return -EINVAL;
> +		}
> +
> +		ret = kstrtoul(req, 10, &req_value);
> +		if (ret < 0)
> +			return ret;
> +
> +		if (req_value == 0) {
> +			pr_debug("Must delete at least one entry\n");
> +			return -EINVAL;
> +		}
> +
> +		ret = ima_queue_stage();
> +		if (ret < 0)
> +			return ret;
> +
> +		ret = ima_queue_staged_delete_partial(req_value);
The default processing is "Trim N" idea plus performance improvement.

Here do everything in one time. And this is what I said in v3.

[PATCH v3 1/3] ima: Remove ima_h_table structure 
<https://lore.kernel.org/linux-integrity/c61aeaa79929a98cb3a6d30835972891fac3570f.camel@linux.ibm.com/T/#t>


The important two parts of trimming is "trim N" and performance improvement.

The performance improvement include two parts:
     hash table staging
     active log list staging

And I think "Trim N" plus performance improvement is the right direction 
to go.
Lots of code for two steps "stage and trim" "stage" part can be removed.

Also race condition may happen if not holding the list all time in user 
space
during attestation period: from stage, read list, attestation and trimming.

So in order to improve the above user space lock time, "Trim T:N" can be 
used
not to hold list long in user space during attestation.

For Trim T:N, T represent total log trimmed since system boot up. Please 
refer to
https://lore.kernel.org/linux-integrity/20260205235849.7086-1-chenste@linux.microsoft.com/T/#t

Thanks,

Steven
>   	}
>   
>   	if (ret < 0)
> diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
> index f5c18acfbc43..4fb557d61a88 100644
> --- a/security/integrity/ima/ima_queue.c
> +++ b/security/integrity/ima/ima_queue.c
> @@ -371,6 +371,76 @@ int ima_queue_staged_delete_all(void)
>   	return 0;
>   }
>   
> +int ima_queue_staged_delete_partial(unsigned long req_value)
> +{
> +	unsigned long req_value_copy = req_value;
> +	unsigned long size_to_remove = 0, num_to_remove = 0;
> +	struct list_head *cut_pos = NULL;
> +	LIST_HEAD(ima_measurements_trim);
> +	struct ima_queue_entry *qe;
> +	int ret = 0;
> +
> +	/*
> +	 * Safe walk (no concurrent write), not under ima_extend_list_mutex
> +	 * for performance reasons.
> +	 */
> +	list_for_each_entry(qe, &ima_measurements_staged, later) {
> +		size_to_remove += get_binary_runtime_size(qe->entry);
> +		num_to_remove++;
> +
> +		if (--req_value_copy == 0) {
> +			/* qe->later always points to a valid list entry. */
> +			cut_pos = &qe->later;
> +			break;
> +		}
> +	}
> +
> +	/* Nothing to remove, undoing staging. */
> +	if (req_value_copy > 0) {
> +		size_to_remove = 0;
> +		num_to_remove = 0;
> +		ret = -ENOENT;
> +	}
> +
> +	mutex_lock(&ima_extend_list_mutex);
> +	if (list_empty(&ima_measurements_staged)) {
> +		mutex_unlock(&ima_extend_list_mutex);
> +		return -ENOENT;
> +	}
> +
> +	if (cut_pos != NULL)
> +		/*
> +		 * ima_dump_measurement_list() does not modify the list,
> +		 * cut_pos remains the same even if it was computed before
> +		 * the lock.
> +		 */
> +		__list_cut_position(&ima_measurements_trim,
> +				    &ima_measurements_staged, cut_pos);
> +
> +	atomic_long_sub(num_to_remove, &ima_num_entries[BINARY_STAGED]);
> +	atomic_long_add(atomic_long_read(&ima_num_entries[BINARY_STAGED]),
> +			&ima_num_entries[BINARY]);
> +	atomic_long_set(&ima_num_entries[BINARY_STAGED], 0);
> +
> +	if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
> +		binary_runtime_size[BINARY_STAGED] -= size_to_remove;
> +		binary_runtime_size[BINARY] +=
> +					binary_runtime_size[BINARY_STAGED];
> +		binary_runtime_size[BINARY_STAGED] = 0;
> +	}
> +
> +	/*
> +	 * Splice (prepend) any remaining non-deleted staged entries to the
> +	 * active list (RCU not needed, there cannot be concurrent readers).
> +	 */
> +	list_splice(&ima_measurements_staged, &ima_measurements);
> +	INIT_LIST_HEAD(&ima_measurements_staged);
> +	mutex_unlock(&ima_extend_list_mutex);
> +
> +	ima_queue_delete(&ima_measurements_trim, false);
> +	return ret;
> +}
> +
>   static void ima_queue_delete(struct list_head *head, bool flush_htable)
>   {
>   	struct ima_queue_entry *qe, *qe_tmp;



^ permalink raw reply

* Re: [PATCH 0/7] lsm: Replace security_sb_mount with granular mount hooks
From: Song Liu @ 2026-03-27  0:31 UTC (permalink / raw)
  To: linux-security-module, linux-fsdevel, selinux, apparmor, paul,
	john.johansen, mic
  Cc: jmorris, serge, viro, brauner, jack, stephen.smalley.work,
	omosnace, gnoack, takedakn, penguin-kernel, herton, kernel-team
In-Reply-To: <20260318184400.3502908-1-song@kernel.org>

Hi folks, especially SELinux, AppArmor, and LandLock maintainers,

Could you please share your comments on this set? AFAICT, there are
no functional changes (other than fixing TOCTOU) to existing LSMs.
If there are no issues with these changes, can we land the set in 7.1
kernels?

Thanks,
Song

On Wed, Mar 18, 2026 at 11:44 AM Song Liu <song@kernel.org> wrote:
[...]

> All existing LSM behaviors are preserved:
>   AppArmor: same policy matching, TOCTOU fixed for bind/move
>   SELinux:  same permission checks (FILE__MOUNTON, FILESYSTEM__REMOUNT)
>   Landlock: same deny-all for sandboxed processes
>   Tomoyo:   same policy matching, TOCTOU fixed for bind/move, unused
>             data_page parameter removed
>
>
> This work is inspired by earlier discussions:
>
> [1] https://lore.kernel.org/bpf/20251127005011.1872209-1-song@kernel.org/
> [2] https://lore.kernel.org/linux-security-module/20250708230504.3994335-1-song@kernel.org/
>
>
> Song Liu (7):
>   lsm: Add granular mount hooks to replace security_sb_mount
>   apparmor: Remove redundant MS_MGC_MSK stripping in apparmor_sb_mount
>   apparmor: Convert from sb_mount to granular mount hooks
>   selinux: Convert from sb_mount to granular mount hooks
>   landlock: Convert from sb_mount to granular mount hooks
>   tomoyo: Convert from sb_mount to granular mount hooks
>   lsm: Remove security_sb_mount and security_move_mount

^ permalink raw reply

* Re: [PATCH 6/7] tomoyo: Convert from sb_mount to granular mount hooks
From: Song Liu @ 2026-03-27  0:40 UTC (permalink / raw)
  To: Tetsuo Handa, Christian Brauner, viro@zeniv.linux.org.uk
  Cc: Song Liu, paul@paul-moore.com, jmorris@namei.org,
	serge@hallyn.com, jack@suse.cz, john.johansen@canonical.com,
	stephen.smalley.work@gmail.com, omosnace@redhat.com,
	mic@digikod.net, gnoack@google.com, takedakn@nttdata.co.jp,
	herton@canonical.com, Kernel Team, selinux@vger.kernel.org,
	apparmor@lists.ubuntu.com, linux-fsdevel@vger.kernel.org,
	linux-security-module@vger.kernel.org
In-Reply-To: <6609e11e-90aa-4021-974e-e9937688dd49@I-love.SAKURA.ne.jp>

Hi Christian and Al,

On Mon, Mar 23, 2026 at 11:12 PM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
[...]
> diff --git a/fs/namespace.c b/fs/namespace.c
> index ba5baccdde67..621b8205a0af 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3777,7 +3777,7 @@ static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags
>   * be added to the namespace tree.
>   */
>  static int do_new_mount_fc(struct fs_context *fc, const struct path *mountpoint,
> -                          unsigned int mnt_flags)
> +                          unsigned int mnt_flags, void *data, unsigned long flags)
>  {
>         struct super_block *sb;
>         struct vfsmount *mnt __free(mntput) = fc_mount(fc);
> @@ -3786,6 +3786,10 @@ static int do_new_mount_fc(struct fs_context *fc, const struct path *mountpoint,
>         if (IS_ERR(mnt))
>                 return PTR_ERR(mnt);
>
> +       error = security_mount_new(fc, mountpoint, mnt_flags, flags, data);
> +       if (error)
> +               return error;
> +
>         sb = fc->root->d_sb;
>         error = security_sb_kern_mount(sb);
>         if (unlikely(error))
> @@ -3857,9 +3861,7 @@ static int do_new_mount(const struct path *path, const char *fstype,
>                 err = -EPERM;
>
>         if (!err)
> -               err = security_mount_new(fc, path, mnt_flags, flags, data);
> -       if (!err)
> -               err = do_new_mount_fc(fc, path, mnt_flags);
> +               err = do_new_mount_fc(fc, path, mnt_flags, data, flags);
>
>         put_fs_context(fc);
>         return err;

Could you please comment on Tetsuo's proposal here? The core
change (moving security_mount_new after fc_mount) makes sense
to me. However, it seems to require a lot of code to support this
change.

Given the current patchset addresses TOCTOU issues of bind
mount, etc. Maybe we can land this set and address TOCTOU
issue with new mount in follow-up patches?

Thanks,
Song

^ permalink raw reply

* Re: [PATCH 0/7] lsm: Replace security_sb_mount with granular mount hooks
From: Paul Moore @ 2026-03-27  1:06 UTC (permalink / raw)
  To: Song Liu
  Cc: linux-security-module, linux-fsdevel, selinux, apparmor,
	john.johansen, mic, jmorris, serge, viro, brauner, jack,
	stephen.smalley.work, omosnace, gnoack, takedakn, penguin-kernel,
	herton, kernel-team
In-Reply-To: <CAPhsuW5h=-BVp6g2UtHTUO8PQtbiSmqDrn0BT3rbdN4BkhQpeA@mail.gmail.com>

On Thu, Mar 26, 2026 at 8:31 PM Song Liu <song@kernel.org> wrote:
>
> Hi folks, especially SELinux, AppArmor, and LandLock maintainers,
>
> Could you please share your comments on this set?

I think we are all aware of this patchset, but most of us are fairly
busy at the moment.  This patchset is in my queue to review, but it
isn't something I'm going to be able to review this week.

> If there are no issues with these changes, can we land the set in 7.1
> kernels?

Given that we are near the end of -rc5, that is highly unlikely.

https://github.com/LinuxSecurityModule/kernel?tab=readme-ov-file#kernel-development-process

-- 
paul-moore.com

^ permalink raw reply

* [PATCH RESEND] apparmor: Fix string overrun due to missing termination
From: Daniel J Blueman @ 2026-03-27 11:58 UTC (permalink / raw)
  To: John Johansen, Paul Moore, James Morris, Serge E. Hallyn,
	Thorsten Blum, apparmor, linux-security-module
  Cc: linux-kernel, Daniel J Blueman, stable

When booting Ubuntu 26.04 with Linux 7.0-rc4 on an ARM64 Qualcomm
Snapdragon X1 we see a string buffer overrun:

BUG: KASAN: slab-out-of-bounds in aa_dfa_match (security/apparmor/match.c:535)
Read of size 1 at addr ffff0008901cc000 by task snap-update-ns/2120

CPU: 5 UID: 60578 PID: 2120 Comm: snap-update-ns Not tainted 7.0.0-rc4+ #22 PREEMPTLAZY
Hardware name: LENOVO 83ED/LNVNB161216, BIOS NHCN60WW 09/11/2025
Call trace:
show_stack (arch/arm64/kernel/stacktrace.c:501) (C)
dump_stack_lvl (lib/dump_stack.c:122)
print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
kasan_report (mm/kasan/report.c:597)
__asan_report_load1_noabort (mm/kasan/report_generic.c:378)
aa_dfa_match (security/apparmor/match.c:535)
match_mnt_path_str (security/apparmor/mount.c:244 security/apparmor/mount.c:336)
match_mnt (security/apparmor/mount.c:371)
aa_bind_mount (security/apparmor/mount.c:447 (discriminator 4))
apparmor_sb_mount (security/apparmor/lsm.c:719 (discriminator 1))
security_sb_mount (security/security.c:1062 (discriminator 31))
path_mount (fs/namespace.c:4101)
__arm64_sys_mount (fs/namespace.c:4172 fs/namespace.c:4361 fs/namespace.c:4338 fs/namespace.c:4338)
invoke_syscall.constprop.0 (arch/arm64/kernel/syscall.c:35 arch/arm64/kernel/syscall.c:49)
el0_svc_common.constprop.0 (./include/linux/thread_info.h:142 (discriminator 2) arch/arm64/kernel/syscall.c:140 (discriminator 2))
do_el0_svc (arch/arm64/kernel/syscall.c:152)
el0_svc (arch/arm64/kernel/entry-common.c:80 arch/arm64/kernel/entry-common.c:725)
el0t_64_sync_handler (arch/arm64/kernel/entry-common.c:744)
el0t_64_sync (arch/arm64/kernel/entry.S:596)

Allocated by task 2120:
kasan_save_stack (mm/kasan/common.c:58)
kasan_save_track (./arch/arm64/include/asm/current.h:19 mm/kasan/common.c:70 mm/kasan/common.c:79)
kasan_save_alloc_info (mm/kasan/generic.c:571)
__kasan_kmalloc (mm/kasan/common.c:419)
__kmalloc_noprof (./include/linux/kasan.h:263 mm/slub.c:5260 mm/slub.c:5272)
aa_get_buffer (security/apparmor/lsm.c:2201)
aa_bind_mount (security/apparmor/mount.c:442)
apparmor_sb_mount (security/apparmor/lsm.c:719 (discriminator 1))
security_sb_mount (security/security.c:1062 (discriminator 31))
path_mount (fs/namespace.c:4101)
__arm64_sys_mount (fs/namespace.c:4172 fs/namespace.c:4361 fs/namespace.c:4338 fs/namespace.c:4338)
invoke_syscall.constprop.0 (arch/arm64/kernel/syscall.c:35 arch/arm64/kernel/syscall.c:49)
el0_svc_common.constprop.0 (./include/linux/thread_info.h:142 (discriminator 2) arch/arm64/kernel/syscall.c:140 (discriminator 2))
do_el0_svc (arch/arm64/kernel/syscall.c:152)
el0_svc (arch/arm64/kernel/entry-common.c:80 arch/arm64/kernel/entry-common.c:725)
el0t_64_sync_handler (arch/arm64/kernel/entry-common.c:744)
el0t_64_sync (arch/arm64/kernel/entry.S:596)

The buggy address belongs to the object at ffff0008901ca000
which belongs to the cache kmalloc-rnd-06-8k of size 8192
The buggy address is located 0 bytes to the right of
allocated 8192-byte region [ffff0008901ca000, ffff0008901cc000)

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x9101c8
head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:-1 pincount:0
flags: 0x8000000000000040(head|zone=2)
page_type: f5(slab)
raw: 8000000000000040 ffff000800016c40 fffffdffe2d14e10 ffff000800015c70
raw: 0000000000000000 0000000800010001 00000000f5000000 0000000000000000
head: 8000000000000040 ffff000800016c40 fffffdffe2d14e10 ffff000800015c70
head: 0000000000000000 0000000800010001 00000000f5000000 0000000000000000
head: 8000000000000003 fffffdffe2407201 fffffdffffffffff 00000000ffffffff
head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008
page dumped because: kasan: bad access detected

Memory state around the buggy address:
ffff0008901cbf00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
ffff0008901cbf80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>ffff0008901cc000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
^
ffff0008901cc080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff0008901cc100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc

This was introduced by previous incorrect conversion from strcpy(). Fix it
by adding the missing terminator.

Cc: stable@vger.kernel.org
Signed-off-by: Daniel J Blueman <daniel@quora.org>
Fixes: 93d4dbdc8da0 ("apparmor: Replace deprecated strcpy in d_namespace_path")
---
 security/apparmor/path.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/security/apparmor/path.c b/security/apparmor/path.c
index 65a0ca5cc1bd..2494e8101538 100644
--- a/security/apparmor/path.c
+++ b/security/apparmor/path.c
@@ -164,14 +164,16 @@ static int d_namespace_path(const struct path *path, char *buf, char **name,
 	}
 
 out:
-	/* Append "/" to directory paths, except for root "/" which
-	 * already ends in a slash.
+	/* Append "/" to directory paths and reterminate string, except for
+	 * root "/" which already ends in a slash.
 	 */
 	if (!error && isdir) {
 		bool is_root = (*name)[0] == '/' && (*name)[1] == '\0';
 
-		if (!is_root)
+		if (!is_root) {
 			buf[aa_g_path_max - 2] = '/';
+			buf[aa_g_path_max - 1] = '\0';
+		}
 	}
 
 	return error;
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v4 09/13] ima: Add support for staging measurements with prompt
From: Roberto Sassu @ 2026-03-27 16:45 UTC (permalink / raw)
  To: steven chen, corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg,
	paul, jmorris, serge
  Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
	gregorylumen, nramas, Roberto Sassu
In-Reply-To: <ef9c296a-940a-4bb5-a0b9-184532cf4bb6@linux.microsoft.com>

On Thu, 2026-03-26 at 15:44 -0700, steven chen wrote:
> On 3/26/2026 10:30 AM, Roberto Sassu wrote:
> > From: Roberto Sassu <roberto.sassu@huawei.com>
> > 
> > Introduce the ability of staging the IMA measurement list and deleting them
> > with a prompt.
> > 
> > Staging means moving the current content of the measurement list to a
> > separate location, and allowing users to read and delete it. This causes
> > the measurement list to be atomically truncated before new measurements can
> > be added. Staging can be done only once at a time. In the event of kexec(),
> > staging is reverted and staged entries will be carried over to the new
> > kernel.
> > 
> > Introduce ascii_runtime_measurements_<algo>_staged and
> > binary_runtime_measurements_<algo>_staged interfaces to stage and delete
> > the measurements. Use 'echo A > <IMA interface>' and
> > 'echo D > <IMA interface>' to respectively stage and delete the entire
> > measurements list. Locking of these interfaces is also mediated with a call
> > to _ima_measurements_open() and with ima_measurements_release().
> > 
> > Implement the staging functionality by introducing the new global
> > measurements list ima_measurements_staged, and ima_queue_stage() and
> > ima_queue_delete_staged_all() to respectively move measurements from the
> > current measurements list to the staged one, and to move staged
> > measurements to the ima_measurements_trim list for deletion. Introduce
> > ima_queue_delete() to delete the measurements.
> > 
> > Finally, introduce the BINARY_STAGED AND BINARY_FULL binary measurements
> > list types, to maintain the counters and the binary size of staged
> > measurements and the full measurements list (including entries that were
> > staged). BINARY still represents the current binary measurements list.
> > 
> > Use the binary size for the BINARY + BINARY_STAGED types in
> > ima_add_kexec_buffer(), since both measurements list types are copied to
> > the secondary kernel during kexec. Use BINARY_FULL in
> > ima_measure_kexec_event(), to generate a critical data record.
> > 
> > It should be noted that the BINARY_FULL counter is not passed through
> > kexec. Thus, the number of entries included in the kexec critical data
> > records refers to the entries since the previous kexec records.
> > 
> > Note: This code derives from the Alt-IMA Huawei project, whose license is
> >        GPL-2.0 OR MIT.
> > 
> > Link: https://github.com/linux-integrity/linux/issues/1
> > Suggested-by: Gregory Lumen <gregorylumen@linux.microsoft.com> (staging revert)
> > Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
> > ---
> >   security/integrity/ima/Kconfig     |  13 +++
> >   security/integrity/ima/ima.h       |   8 +-
> >   security/integrity/ima/ima_fs.c    | 167 ++++++++++++++++++++++++++---
> >   security/integrity/ima/ima_kexec.c |  22 +++-
> >   security/integrity/ima/ima_queue.c |  97 ++++++++++++++++-
> >   5 files changed, 286 insertions(+), 21 deletions(-)
> > 
> > diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
> > index 976e75f9b9ba..e714726f3384 100644
> > --- a/security/integrity/ima/Kconfig
> > +++ b/security/integrity/ima/Kconfig
> > @@ -332,4 +332,17 @@ config IMA_KEXEC_EXTRA_MEMORY_KB
> >   	  If set to the default value of 0, an extra half page of memory for those
> >   	  additional measurements will be allocated.
> >   
> > +config IMA_STAGING
> > +	bool "Support for staging the measurements list"
> > +	default y
> > +	help
> > +	  Add support for staging the measurements list.
> > +
> > +	  It allows user space to stage the measurements list for deletion and
> > +	  to delete the staged measurements after confirmation.
> > +
> > +	  On kexec, staging is reverted and staged measurements are prepended
> > +	  to the current measurements list when measurements are copied to the
> > +	  secondary kernel.
> > +
> >   endif
> > diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> > index 97b7d6024b5d..65db152a0a24 100644
> > --- a/security/integrity/ima/ima.h
> > +++ b/security/integrity/ima/ima.h
> > @@ -30,9 +30,11 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8, TPM_PCR10 = 10 };
> >   
> >   /*
> >    * BINARY: current binary measurements list
> > + * BINARY_STAGED: staged binary measurements list
> > + * BINARY_FULL: binary measurements list since IMA init (lost after kexec)
> >    */
> >   enum binary_lists {
> > -	BINARY, BINARY__LAST
> > +	BINARY, BINARY_STAGED, BINARY_FULL, BINARY__LAST
> >   };
> >   
> >   /* digest size for IMA, fits SHA1 or MD5 */
> > @@ -125,6 +127,7 @@ struct ima_queue_entry {
> >   	struct ima_template_entry *entry;
> >   };
> >   extern struct list_head ima_measurements;	/* list of all measurements */
> > +extern struct list_head ima_measurements_staged; /* list of staged meas. */
> >   
> >   /* Some details preceding the binary serialized measurement list */
> >   struct ima_kexec_hdr {
> > @@ -314,6 +317,8 @@ struct ima_template_desc *ima_template_desc_current(void);
> >   struct ima_template_desc *ima_template_desc_buf(void);
> >   struct ima_template_desc *lookup_template_desc(const char *name);
> >   bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
> > +int ima_queue_stage(void);
> > +int ima_queue_staged_delete_all(void);
> >   int ima_restore_measurement_entry(struct ima_template_entry *entry);
> >   int ima_restore_measurement_list(loff_t bufsize, void *buf);
> >   int ima_measurements_show(struct seq_file *m, void *v);
> > @@ -334,6 +339,7 @@ extern spinlock_t ima_queue_lock;
> >   extern atomic_long_t ima_num_entries[BINARY__LAST];
> >   extern atomic_long_t ima_num_violations;
> >   extern struct hlist_head __rcu *ima_htable;
> > +extern struct mutex ima_extend_list_mutex;
> >   
> >   static inline unsigned int ima_hash_key(u8 *digest)
> >   {
> > diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> > index 7709a4576322..39d9128e9f22 100644
> > --- a/security/integrity/ima/ima_fs.c
> > +++ b/security/integrity/ima/ima_fs.c
> > @@ -24,6 +24,13 @@
> >   
> >   #include "ima.h"
> >   
> > +/*
> > + * Requests:
> > + * 'A\n': stage the entire measurements list
> > + * 'D\n': delete all staged measurements
> > + */
> > +#define STAGED_REQ_LENGTH 21
> > +
> >   static DEFINE_MUTEX(ima_write_mutex);
> >   static DEFINE_MUTEX(ima_measure_mutex);
> >   static long ima_measure_users;
> > @@ -97,6 +104,11 @@ static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
> >   	return _ima_measurements_start(m, pos, &ima_measurements);
> >   }
> >   
> > +static void *ima_measurements_staged_start(struct seq_file *m, loff_t *pos)
> > +{
> > +	return _ima_measurements_start(m, pos, &ima_measurements_staged);
> > +}
> > +
> >   static void *_ima_measurements_next(struct seq_file *m, void *v, loff_t *pos,
> >   				    struct list_head *head)
> >   {
> > @@ -118,6 +130,12 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
> >   	return _ima_measurements_next(m, v, pos, &ima_measurements);
> >   }
> >   
> > +static void *ima_measurements_staged_next(struct seq_file *m, void *v,
> > +					  loff_t *pos)
> > +{
> > +	return _ima_measurements_next(m, v, pos, &ima_measurements_staged);
> > +}
> > +
> >   static void ima_measurements_stop(struct seq_file *m, void *v)
> >   {
> >   }
> > @@ -283,6 +301,68 @@ static const struct file_operations ima_measurements_ops = {
> >   	.release = ima_measurements_release,
> >   };
> >   
> > +static const struct seq_operations ima_measurments_staged_seqops = {
> > +	.start = ima_measurements_staged_start,
> > +	.next = ima_measurements_staged_next,
> > +	.stop = ima_measurements_stop,
> > +	.show = ima_measurements_show
> > +};
> > +
> > +static int ima_measurements_staged_open(struct inode *inode, struct file *file)
> > +{
> > +	return _ima_measurements_open(inode, file,
> > +				      &ima_measurments_staged_seqops);
> > +}
> > +
> > +static ssize_t ima_measurements_staged_write(struct file *file,
> > +					     const char __user *buf,
> > +					     size_t datalen, loff_t *ppos)
> > +{
> > +	char req[STAGED_REQ_LENGTH];
> > +	int ret;
> > +
> > +	if (*ppos > 0 || datalen < 2 || datalen > STAGED_REQ_LENGTH)
> > +		return -EINVAL;
> > +
> > +	if (copy_from_user(req, buf, datalen) != 0)
> > +		return -EFAULT;
> > +
> > +	if (req[datalen - 1] != '\n')
> > +		return -EINVAL;
> > +
> > +	req[datalen - 1] = '\0';
> > +
> > +	switch (req[0]) {
> > +	case 'A':
> > +		if (datalen != 2)
> > +			return -EINVAL;
> > +
> > +		ret = ima_queue_stage();
> > +		break;
> > +	case 'D':
> > +		if (datalen != 2)
> > +			return -EINVAL;
> > +
> > +		ret = ima_queue_staged_delete_all();
> > +		break;
> 
> I think the following two steps may not work because of race condition:
> 
> step1: ret = ima_queue_stage(); //this will put all logs in active list into staged list;
> step2: ret = ima_queue_staged_delete_all(); //this will delete all logs in staged list;
> 
> The following is the step of race condition:
>      1. current active log list LA1;
>      2. user agent read the TPM quote QA1 match list LA1;
>      3. new event NewLog is added into active log list LA1+NewLog
>      4. user agent call ima_queue_stage() and generated staged list
>         including LA1+NewLog.
>      5. user agent call ima_queue_staged_delete_all();
>         The new log NewLog in step 3 is also deleted

Please refer to the documentation patch which explains the intended
workflow of this approach (Remote Attestation Agent Workflow).

Roberto

> Next time the attestation will fail if using the active log list in the 
> kernel.
> 
> Thanks,
> 
> Steven
> 
> > +	default:
> > +		ret = -EINVAL;
> > +	}
> > +
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	return datalen;
> > +}
> > +
> > +static const struct file_operations ima_measurements_staged_ops = {
> > +	.open = ima_measurements_staged_open,
> > +	.read = seq_read,
> > +	.write = ima_measurements_staged_write,
> > +	.llseek = seq_lseek,
> > +	.release = ima_measurements_release,
> > +};
> > +
> >   void ima_print_digest(struct seq_file *m, u8 *digest, u32 size)
> >   {
> >   	u32 i;
> > @@ -356,6 +436,28 @@ static const struct file_operations ima_ascii_measurements_ops = {
> >   	.release = ima_measurements_release,
> >   };
> >   
> > +static const struct seq_operations ima_ascii_measurements_staged_seqops = {
> > +	.start = ima_measurements_staged_start,
> > +	.next = ima_measurements_staged_next,
> > +	.stop = ima_measurements_stop,
> > +	.show = ima_ascii_measurements_show
> > +};
> > +
> > +static int ima_ascii_measurements_staged_open(struct inode *inode,
> > +					      struct file *file)
> > +{
> > +	return _ima_measurements_open(inode, file,
> > +				      &ima_ascii_measurements_staged_seqops);
> > +}
> > +
> > +static const struct file_operations ima_ascii_measurements_staged_ops = {
> > +	.open = ima_ascii_measurements_staged_open,
> > +	.read = seq_read,
> > +	.write = ima_measurements_staged_write,
> > +	.llseek = seq_lseek,
> > +	.release = ima_measurements_release,
> > +};
> > +
> >   static ssize_t ima_read_policy(char *path)
> >   {
> >   	void *data = NULL;
> > @@ -459,10 +561,21 @@ static const struct seq_operations ima_policy_seqops = {
> >   };
> >   #endif
> >   
> > -static int __init create_securityfs_measurement_lists(void)
> > +static int __init create_securityfs_measurement_lists(bool staging)
> >   {
> > +	const struct file_operations *ascii_ops = &ima_ascii_measurements_ops;
> > +	const struct file_operations *binary_ops = &ima_measurements_ops;
> > +	mode_t permissions = S_IRUSR | S_IRGRP;
> > +	const char *file_suffix = "";
> >   	int count = NR_BANKS(ima_tpm_chip);
> >   
> > +	if (staging) {
> > +		ascii_ops = &ima_ascii_measurements_staged_ops;
> > +		binary_ops = &ima_measurements_staged_ops;
> > +		file_suffix = "_staged";
> > +		permissions |= (S_IWUSR | S_IWGRP);
> > +	}
> > +
> >   	if (ima_sha1_idx >= NR_BANKS(ima_tpm_chip))
> >   		count++;
> >   
> > @@ -473,29 +586,32 @@ static int __init create_securityfs_measurement_lists(void)
> >   
> >   		if (algo == HASH_ALGO__LAST)
> >   			snprintf(file_name, sizeof(file_name),
> > -				 "ascii_runtime_measurements_tpm_alg_%x",
> > -				 ima_tpm_chip->allocated_banks[i].alg_id);
> > +				 "ascii_runtime_measurements_tpm_alg_%x%s",
> > +				 ima_tpm_chip->allocated_banks[i].alg_id,
> > +				 file_suffix);
> >   		else
> >   			snprintf(file_name, sizeof(file_name),
> > -				 "ascii_runtime_measurements_%s",
> > -				 hash_algo_name[algo]);
> > -		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
> > +				 "ascii_runtime_measurements_%s%s",
> > +				 hash_algo_name[algo], file_suffix);
> > +		dentry = securityfs_create_file(file_name, permissions,
> >   						ima_dir, (void *)(uintptr_t)i,
> > -						&ima_ascii_measurements_ops);
> > +						ascii_ops);
> >   		if (IS_ERR(dentry))
> >   			return PTR_ERR(dentry);
> >   
> >   		if (algo == HASH_ALGO__LAST)
> >   			snprintf(file_name, sizeof(file_name),
> > -				 "binary_runtime_measurements_tpm_alg_%x",
> > -				 ima_tpm_chip->allocated_banks[i].alg_id);
> > +				 "binary_runtime_measurements_tpm_alg_%x%s",
> > +				 ima_tpm_chip->allocated_banks[i].alg_id,
> > +				 file_suffix);
> >   		else
> >   			snprintf(file_name, sizeof(file_name),
> > -				 "binary_runtime_measurements_%s",
> > -				 hash_algo_name[algo]);
> > -		dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
> > +				 "binary_runtime_measurements_%s%s",
> > +				 hash_algo_name[algo], file_suffix);
> > +
> > +		dentry = securityfs_create_file(file_name, permissions,
> >   						ima_dir, (void *)(uintptr_t)i,
> > -						&ima_measurements_ops);
> > +						binary_ops);
> >   		if (IS_ERR(dentry))
> >   			return PTR_ERR(dentry);
> >   	}
> > @@ -503,6 +619,23 @@ static int __init create_securityfs_measurement_lists(void)
> >   	return 0;
> >   }
> >   
> > +static int __init create_securityfs_staging_links(void)
> > +{
> > +	struct dentry *dentry;
> > +
> > +	dentry = securityfs_create_symlink("binary_runtime_measurements_staged",
> > +		ima_dir, "binary_runtime_measurements_sha1_staged", NULL);
> > +	if (IS_ERR(dentry))
> > +		return PTR_ERR(dentry);
> > +
> > +	dentry = securityfs_create_symlink("ascii_runtime_measurements_staged",
> > +		ima_dir, "ascii_runtime_measurements_sha1_staged", NULL);
> > +	if (IS_ERR(dentry))
> > +		return PTR_ERR(dentry);
> > +
> > +	return 0;
> > +}
> > +
> >   /*
> >    * ima_open_policy: sequentialize access to the policy file
> >    */
> > @@ -595,7 +728,13 @@ int __init ima_fs_init(void)
> >   		goto out;
> >   	}
> >   
> > -	ret = create_securityfs_measurement_lists();
> > +	ret = create_securityfs_measurement_lists(false);
> > +	if (ret == 0 && IS_ENABLED(CONFIG_IMA_STAGING)) {
> > +		ret = create_securityfs_measurement_lists(true);
> > +		if (ret == 0)
> > +			ret = create_securityfs_staging_links();
> > +	}
> > +
> >   	if (ret != 0)
> >   		goto out;
> >   
> > diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
> > index d7d0fb639d99..d5503dd5cc9b 100644
> > --- a/security/integrity/ima/ima_kexec.c
> > +++ b/security/integrity/ima/ima_kexec.c
> > @@ -42,8 +42,8 @@ void ima_measure_kexec_event(const char *event_name)
> >   	long len;
> >   	int n;
> >   
> > -	buf_size = ima_get_binary_runtime_size(BINARY);
> > -	len = atomic_long_read(&ima_num_entries[BINARY]);
> > +	buf_size = ima_get_binary_runtime_size(BINARY_FULL);
> > +	len = atomic_long_read(&ima_num_entries[BINARY_FULL]);
> >   
> >   	n = scnprintf(ima_kexec_event, IMA_KEXEC_EVENT_LEN,
> >   		      "kexec_segment_size=%lu;ima_binary_runtime_size=%lu;"
> > @@ -106,13 +106,26 @@ static int ima_dump_measurement_list(unsigned long *buffer_size, void **buffer,
> >   
> >   	memset(&khdr, 0, sizeof(khdr));
> >   	khdr.version = 1;
> > -	/* This is an append-only list, no need to hold the RCU read lock */
> > -	list_for_each_entry_rcu(qe, &ima_measurements, later, true) {
> > +	/* It can race with ima_queue_stage() and ima_queue_delete_staged(). */
> > +	mutex_lock(&ima_extend_list_mutex);
> > +
> > +	list_for_each_entry_rcu(qe, &ima_measurements_staged, later,
> > +				lockdep_is_held(&ima_extend_list_mutex)) {
> >   		ret = ima_dump_measurement(&khdr, qe);
> >   		if (ret < 0)
> >   			break;
> >   	}
> >   
> > +	list_for_each_entry_rcu(qe, &ima_measurements, later,
> > +				lockdep_is_held(&ima_extend_list_mutex)) {
> > +		if (!ret)
> > +			ret = ima_dump_measurement(&khdr, qe);
> > +		if (ret < 0)
> > +			break;
> > +	}
> > +
> > +	mutex_unlock(&ima_extend_list_mutex);
> > +
> >   	/*
> >   	 * fill in reserved space with some buffer details
> >   	 * (eg. version, buffer size, number of measurements)
> > @@ -167,6 +180,7 @@ void ima_add_kexec_buffer(struct kimage *image)
> >   		extra_memory = CONFIG_IMA_KEXEC_EXTRA_MEMORY_KB * 1024;
> >   
> >   	binary_runtime_size = ima_get_binary_runtime_size(BINARY) +
> > +			      ima_get_binary_runtime_size(BINARY_STAGED) +
> >   			      extra_memory;
> >   
> >   	if (binary_runtime_size >= ULONG_MAX - PAGE_SIZE)
> > diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
> > index b6d10dceb669..50519ed837d4 100644
> > --- a/security/integrity/ima/ima_queue.c
> > +++ b/security/integrity/ima/ima_queue.c
> > @@ -26,6 +26,7 @@
> >   static struct tpm_digest *digests;
> >   
> >   LIST_HEAD(ima_measurements);	/* list of all measurements */
> > +LIST_HEAD(ima_measurements_staged); /* list of staged measurements */
> >   #ifdef CONFIG_IMA_KEXEC
> >   static unsigned long binary_runtime_size[BINARY__LAST];
> >   #else
> > @@ -45,11 +46,11 @@ atomic_long_t ima_num_violations = ATOMIC_LONG_INIT(0);
> >   /* key: inode (before secure-hashing a file) */
> >   struct hlist_head __rcu *ima_htable;
> >   
> > -/* mutex protects atomicity of extending measurement list
> > +/* mutex protects atomicity of extending and staging measurement list
> >    * and extending the TPM PCR aggregate. Since tpm_extend can take
> >    * long (and the tpm driver uses a mutex), we can't use the spinlock.
> >    */
> > -static DEFINE_MUTEX(ima_extend_list_mutex);
> > +DEFINE_MUTEX(ima_extend_list_mutex);
> >   
> >   /*
> >    * Used internally by the kernel to suspend measurements.
> > @@ -174,12 +175,16 @@ static int ima_add_digest_entry(struct ima_template_entry *entry,
> >   				lockdep_is_held(&ima_extend_list_mutex));
> >   
> >   	atomic_long_inc(&ima_num_entries[BINARY]);
> > +	atomic_long_inc(&ima_num_entries[BINARY_FULL]);
> > +
> >   	if (update_htable) {
> >   		key = ima_hash_key(entry->digests[ima_hash_algo_idx].digest);
> >   		hlist_add_head_rcu(&qe->hnext, &htable[key]);
> >   	}
> >   
> >   	ima_update_binary_runtime_size(entry, BINARY);
> > +	ima_update_binary_runtime_size(entry, BINARY_FULL);
> > +
> >   	return 0;
> >   }
> >   
> > @@ -280,6 +285,94 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
> >   	return result;
> >   }
> >   
> > +int ima_queue_stage(void)
> > +{
> > +	int ret = 0;
> > +
> > +	mutex_lock(&ima_extend_list_mutex);
> > +	if (!list_empty(&ima_measurements_staged)) {
> > +		ret = -EEXIST;
> > +		goto out_unlock;
> > +	}
> > +
> > +	if (list_empty(&ima_measurements)) {
> > +		ret = -ENOENT;
> > +		goto out_unlock;
> > +	}
> > +
> > +	list_replace(&ima_measurements, &ima_measurements_staged);
> > +	INIT_LIST_HEAD(&ima_measurements);
> > +
> > +	atomic_long_set(&ima_num_entries[BINARY_STAGED],
> > +			atomic_long_read(&ima_num_entries[BINARY]));
> > +	atomic_long_set(&ima_num_entries[BINARY], 0);
> > +
> > +	if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
> > +		binary_runtime_size[BINARY_STAGED] =
> > +					binary_runtime_size[BINARY];
> > +		binary_runtime_size[BINARY] = 0;
> > +	}
> > +out_unlock:
> > +	mutex_unlock(&ima_extend_list_mutex);
> > +	return ret;
> > +}
> > +
> > +static void ima_queue_delete(struct list_head *head);
> > +
> > +int ima_queue_staged_delete_all(void)
> > +{
> > +	LIST_HEAD(ima_measurements_trim);
> > +
> > +	mutex_lock(&ima_extend_list_mutex);
> > +	if (list_empty(&ima_measurements_staged)) {
> > +		mutex_unlock(&ima_extend_list_mutex);
> > +		return -ENOENT;
> > +	}
> > +
> > +	list_replace(&ima_measurements_staged, &ima_measurements_trim);
> > +	INIT_LIST_HEAD(&ima_measurements_staged);
> > +
> > +	atomic_long_set(&ima_num_entries[BINARY_STAGED], 0);
> > +
> > +	if (IS_ENABLED(CONFIG_IMA_KEXEC))
> > +		binary_runtime_size[BINARY_STAGED] = 0;
> > +
> > +	mutex_unlock(&ima_extend_list_mutex);
> > +
> > +	ima_queue_delete(&ima_measurements_trim);
> > +	return 0;
> > +}
> > +
> > +static void ima_queue_delete(struct list_head *head)
> > +{
> > +	struct ima_queue_entry *qe, *qe_tmp;
> > +	unsigned int i;
> > +
> > +	list_for_each_entry_safe(qe, qe_tmp, head, later) {
> > +		/*
> > +		 * Safe to free template_data here without synchronize_rcu()
> > +		 * because the only htable reader, ima_lookup_digest_entry(),
> > +		 * accesses only entry->digests, not template_data. If new
> > +		 * htable readers are added that access template_data, a
> > +		 * synchronize_rcu() is required here.
> > +		 */
> > +		for (i = 0; i < qe->entry->template_desc->num_fields; i++) {
> > +			kfree(qe->entry->template_data[i].data);
> > +			qe->entry->template_data[i].data = NULL;
> > +			qe->entry->template_data[i].len = 0;
> > +		}
> > +
> > +		list_del(&qe->later);
> > +
> > +		/* No leak if condition is false, referenced by ima_htable. */
> > +		if (IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) {
> > +			kfree(qe->entry->digests);
> > +			kfree(qe->entry);
> > +			kfree(qe);
> > +		}
> > +	}
> > +}
> > +
> >   int ima_restore_measurement_entry(struct ima_template_entry *entry)
> >   {
> >   	int result = 0;
> 


^ permalink raw reply

* Re: [PATCH v3 4/9] lsm: framework for BPF integrity verification
From: Song Liu @ 2026-03-27 16:46 UTC (permalink / raw)
  To: Blaise Boscaccy
  Cc: Jonathan Corbet, Paul Moore, James Morris, Serge E. Hallyn,
	Mickaël Salaün, Günther Noack,
	Dr. David Alan Gilbert, Andrew Morton, James.Bottomley, dhowells,
	Fan Wu, Ryan Foster, Randy Dunlap, linux-security-module,
	linux-doc, linux-kernel, bpf
In-Reply-To: <20260326060655.2550595-5-bboscaccy@linux.microsoft.com>

On Wed, Mar 25, 2026 at 11:07 PM Blaise Boscaccy
<bboscaccy@linux.microsoft.com> wrote:
[...]
> The first new callback, bpf_prog_load_integrity(), located within the
> security_bpf_prog_load() hook, is necessary to ensure that the integrity
> verification callbacks are executed before any of the existing LSMs
> are executed via the bpf_prog_load() callback.  Reusing the existing
> bpf_prog_load() callback for integrity verification could result in LSMs
> not having access to the integrity verification results when asked to
> authorize the BPF program load in the bpf_prog_load() callback.
>
> The new LSM hook, security_bpf_prog_load_post_integrity(), is intended
> to be called from within LSMs performing BPF program integrity
> verification.  It is used to report the verdict of the integrity
> verification to other LSMs enforcing access control policy on BPF
> program loads.  LSMs enforcing such access controls should register a
> bpf_prog_load_post_integrity() callback to receive integrity verdicts.

bpf_prog_load_post_integrity() is weird. Some questions about it:

1. Is it possible to call it from other LSMs (not hornet)? Specifically, is it
   possible to call it from BPF LSM?
2. This set does not include any LSMs that attach functions to
   bpf_prog_load_post_integrity. This is against the new LSM hook policy.
   I guess the plan is to add LSM users in follow up patches? Could you
   please include at least some of such code in this patchset? This will
   help folks understand the use case.

Thanks,
Song

[...]

^ permalink raw reply

* [PATCH v8 00/12] landlock: UNIX connect() control by pathname and scope
From: Günther Noack @ 2026-03-27 16:48 UTC (permalink / raw)
  To: Mickaël Salaün, John Johansen, Paul Moore, James Morris,
	Serge E . Hallyn
  Cc: Günther Noack, linux-security-module, Tingmao Wang,
	Justin Suess, Samasth Norway Ananda, Matthieu Buffet,
	Mikhail Ivanov, konstantin.meskhidze, Demi Marie Obenour,
	Alyssa Ross, Jann Horn, Tahera Fahimi, Sebastian Andrzej Siewior,
	Kuniyuki Iwashima, Georgia Garcia, Simon Horman, netdev,
	Alexander Viro, Christian Brauner

Hello!

This patch set introduces a filesystem-based Landlock restriction
mechanism for connecting to UNIX domain sockets (or addressing them
with sendmsg(2)).  It introduces the filesystem access right
LANDLOCK_ACCESS_FS_RESOLVE_UNIX.

For the connection-oriented SOCK_STREAM and SOCK_SEQPACKET type
sockets, the access right makes the connect(2) operation fail with
EACCES, if denied.

SOCK_DGRAM-type UNIX sockets can be used both with connect(2), or by
passing an explicit recipient address with every sendmsg(2)
invocation.  In the latter case, the Landlock check is done when an
explicit recipient address is passed to sendmsg(2) and can make
sendmsg(2) return EACCES.  When UNIX datagram sockets are connected
with connect(2), a fixed recipient address is associated with the
socket and the check happens during connect(2) and may return EACCES.

When LANDLOCK_ACCESS_FS_RESOLVE_UNIX is handled within a Landlock
domain, this domain will only allow connect(2) and sendmsg(2) to
server sockets that were created within the same domain.  Or, to
phrase it the other way around: Unless it is allow-listed with a
LANDLOCK_PATH_BENEATH rule, the newly created domain denies connect(2)
and sendmsg(2) actions that are directed *outwards* of that domain.
In that regard, LANDLOCK_ACCESS_FS_RESOLVE_UNIX has the same semantics
as one of the "scoped" access rights.

== Motivation

Currently, landlocked processes can connect to named UNIX sockets
through the BSD socket API described in unix(7), by invoking socket(2)
followed by connect(2) with a suitable struct sockname_un holding the
socket's filename.  This is a surprising gap in Landlock's sandboxing
capabilities for users (e.g. in [1]) and it can be used to escape a
sandbox when a Unix service offers command execution (various such
scenarios were listed by Tingmao Wang in [2]).

The original feature request is at [4].

== Alternatives and Related Work

=== Alternative: Use existing LSM hooks

We have carefully and seriously considered the use of existing LSM
hooks, but still came to the conclusion that a new LSM hook is better
suited in this case:

The existing hooks security_unix_stream_connect(),
security_unix_may_send() and security_socket_connect() do not give
access to the resolved filesystem path.

* Resolving the filesystem path in the struct sockaddr_un again within
  a Landlock would produce a TOCTOU race, so this is not an option.
* We would therefore need to wire through the resolved struct path
  from unix_find_bsd() to one of the existing LSM hooks which get
  called later.  This would be a more substantial change to af_unix.c.

The struct path that is available in the listening-side struct sock is
can be read through the existing hooks, but it is not an option to use
this information: As the listening socket may have been bound from
within a different namespace, the path that was used for that can is
in the general case not meaningful for a sandboxed process.  In
particular, it is not possible to use this path (or prefixes thereof)
when constructing a sandbox policy in the client-side process.

Paul Moore also chimed in in support of adding a new hook, with the
rationale that the simplest change to the LSM hook interface has
traditionally proven to be the most robust. [11]

More details are on the Github issue at [6] and on the LKML at [9].

In a the discussion of the V2 review, started by Christian Brauner
[10], we have further explored the approach of reusing the existing
LSM hooks but still ended up leaning on the side of introducing a new
hook, with Paul Moore and me (gnoack) arguing for that option.

Further insights about the LSM hook were shared in the V3 review by
Tingmao Wang [12], who spotted additional requirements due to the two
approaches being merged into one patch set.  The summary of that
discussion is in [13].

=== Related work: Scope Control for Pathname Unix Sockets

The motivation for this patch is the same as in Tingmao Wang's patch
set for "scoped" control for pathname Unix sockets [2], originally
proposed in the Github feature request [5].

In [14], we have settled on the decision to merge the two patch sets
into this one, whose primary way of controlling connect(2) is
LANDLOCK_ACCESS_FS_RESOLVE_UNIX, but where this flag additionally has
the semantics of only restricting this unix(7) IPC *outwards* of the
created Landlock domain, in line with the logic that exists for the
existing "scoped" flags already.

By having LANDLOCK_ACCESS_FS_RESOLVE_UNIX implement "scoping"
semantics, we can avoid introducing two separate interacting flags for
now, but we retain the option of introducing
LANDLOCK_SCOPE_PATHNAME_UNIX_SOCKET at a later point in time, should
such a flag be needed to express additional rules.

== Credits

The feature was originally suggested by Jann Horn in [7].

Tingmao Wang and Demi Marie Obenour have taken the initiative to
revive this discussion again in [1], [4] and [5].

Tingmao Wang has sent the patch set for the scoped access control for
pathname Unix sockets [2] and has contributed substantial insights
during the code review, shaping the form of the LSM hook and agreeing
to merge the pathname and scoped-flag patch sets.

Justin Suess has sent the patch for the LSM hook in [8] and
subsequently through this patch set.

Christian Brauner and Paul Moore have contributed to the design of the
new LSM hook, discussing the tradeoffs in [10].

Sebastian Andrzej Siewior and Kuniyuki Iwashima have helped with
locking questions in the networking subsystem. [15] [16]

Ryan Sullivan has started on an initial implementation and has brought
up relevant discussion points on the Github issue at [4].

As maintainer of Landlock, Mickaël Salaün has done the main review so
far and particularly pointed out ways in which the UNIX connect()
patch sets interact with each other and what we need to look for with
regards to UAPI consistency as Landlock evolves.

[1] https://lore.kernel.org/landlock/515ff0f4-2ab3-46de-8d1e-5c66a93c6ede@gmail.com/
[2] Tingmao Wang's "Implement scope control for pathname Unix sockets"
    https://lore.kernel.org/all/cover.1767115163.git.m@maowtm.org/
[3] https://lore.kernel.org/all/20251230.bcae69888454@gnoack.org/
[4] Github issue for FS-based control for named Unix sockets:
    https://github.com/landlock-lsm/linux/issues/36
[5] Github issue for scope-based restriction of named Unix sockets:
    https://github.com/landlock-lsm/linux/issues/51
[6] https://github.com/landlock-lsm/linux/issues/36#issuecomment-2950632277
[7] https://lore.kernel.org/linux-security-module/CAG48ez3NvVnonOqKH4oRwRqbSOLO0p9djBqgvxVwn6gtGQBPcw@mail.gmail.com/
[8] Patch for the LSM hook:
    https://lore.kernel.org/all/20251231213314.2979118-1-utilityemal77@gmail.com/
[9] https://lore.kernel.org/all/20260108.64bd7391e1ae@gnoack.org/
[10] https://lore.kernel.org/all/20260113-kerngesund-etage-86de4a21da24@brauner/
[11] https://lore.kernel.org/all/CAHC9VhQHZCe0LMx4xzSo-h1SWY489U4frKYnxu4YVrcJN3x7nA@mail.gmail.com/
[12] https://lore.kernel.org/all/e6b6b069-384c-4c45-a56b-fa54b26bc72a@maowtm.org/
[13] https://lore.kernel.org/all/aYMenaSmBkAsFowd@google.com/
[14] https://lore.kernel.org/all/20260205.Kiech3gupee1@digikod.net/
[15] https://lore.kernel.org/all/20260310151907.VYySCtJp@linutronix.de/
[16] https://lore.kernel.org/all/CAAVpQUC95mSjX1vRK===pubHofcYqbkNE7goYKiu6vha5GYAFw@mail.gmail.com/

---

== Patch set history

V1: https://lore.kernel.org/all/20260101134102.25938-1-gnoack3000@gmail.com/
V2: https://lore.kernel.org/all/20260110143300.71048-2-gnoack3000@gmail.com/
V3: https://lore.kernel.org/all/20260119203457.97676-2-gnoack3000@gmail.com/
V4: https://lore.kernel.org/all/20260208231017.114343-1-gnoack3000@gmail.com/
V5: https://lore.kernel.org/all/20260215105158.28132-1-gnoack3000@gmail.com/
V6: https://lore.kernel.org/all/20260315222150.121952-1-gnoack3000@gmail.com/
V7: https://lore.kernel.org/all/20260323165654.193957-1-gnoack3000@gmail.com/

Changes in V8:

* Added a fix for the m68k GCC 15 compiler failure
* Reorder BUILD_BUG_ON commit to be directly after the implementation commit
* Documentation: Use the current month (March 2026) in uapi doc comment

Changes in V7:

* Implementation:
  * LSM hook: Small header file layout restructurings, typos (Justin)
  * Hold unix_state_lock across the usage of the other Landlock domain
    This prevents a UAF; spotted by Mickaël and Sebastian in code review 🏆
  * Ignore the return value from landlock_init_layer_masks() -
    it is not needed in this specific case
  * Add a BUILD_BUG_ON to unmask_scoped_access() like in
    domain_is_scoped().  Revise the BUILD_BUG_ON logic and bring both
    implementations in line.
* Documentation and commentary:
  * Mention access_mask_t change from u16 to u32 in commit message
  * Improve documentation for unmask_scoped_access()
  * Various typos and smaller documentation fixes, caught in code review
  * Add ABI version remark to landlock.h
  * Add comment to explain why returning 0 on SOCK_DEAD is correct -
    This is not obvious in the code and requires understanding the callers
  * In kernel docs, use "case 6 ... 8" in a place

Changes in V6:

* Implementation:
  * Move the LSM hook call after the check that checks for the other
    end's matching socket type.  (Justin Suess)
  * Lock with unix_state_lock() and check SOCK_DEAD.
  * Remove unnecessary layer_access_masks_empty() call (and its
    implementation).
* Documentation:
  * Squash docs with design rationale into main implementation commit,
    and cross-referece it from the header docs.
  * Clarify that denials result in EACCES and that this is consistent
    with other filesystem access rights.
* Minor:
  * Use mem_is_zero() in is_layer_masks_allowed() (minor cleanup)
  * Omit unnecessary __attribute__((fallthrough)) usages
  * Remove comment at the end of a line in a place.
* Selftests:
  * sun_path population fixes
  * coredump test: Set EUID to 0 (needed for UML-based selftests)
    Link[1]: https://lore.kernel.org/all/20260218.ohth8theu8Yi@digikod.net/

Changes in V5:

This change primarily adds tests, changing the testing approach for
the main test to use the scoped_domains fixture as in Tingmao's patch
set [2], and adding tests for the audit and coredump cases.

* Selftests:
  * Replaced the main selftest with one based on scoped_domains
  * Added audit test
  * Added test for the coredump case
  * Added a follow-up commit that simplifies ruleset enforcement
* Kernel code:
  * Mark coredump check as unlikely (per Justin's review)
  * Drop check for socket type (per Mickaël's review)

Changes in V4:

Since this version, this patch set subsumes the scoping semantics from
Tingmao Wang's "Scope Control" patch set [2], per discussion with
Tingmao Wang and Mickaël Salaün in [14] and in the thread leading up
to it.

Now, LANDLOCK_SCOPE_PATHNAME_UNIX_SOCKET only restricts connect(2) and
sendmsg(2) *outwards* of the domain where it is restricted, *with the
same semantics as a "scoped" flag*.

 * Implement a layer-mask based version of domain_is_scoped():
   unmask_scoped_access().  Rationale: domain_is_scoped() returns
   early, which we can't do in the layer masks based variant.  The two
   variants are similar enough.
 * LSM hook: Replace 'type' argument with 'sk' argument,
   per discussion in [12] and [13].
 * Bump ABI version to 9 (pessimistically assuming that we won't make
   it for 7.0)
 * Documentation fixes in header file and in Documentation/
 * selftests: more test variants, now also parameterizing whether the
   server socket gets created within the Landlock domain or before that
 * selftests: use EXPECT_EQ() for test cleanup

Changes in V3:
 * LSM hook: rename it to security_unix_find() (Justin Suess)
   (resolving the previously open question about the LSM hook name)
   Related discussions:
   https://lore.kernel.org/all/20260112.Wufar9coosoo@digikod.net/
   https://lore.kernel.org/all/CAHC9VhSRiHwLEWfFkQdPEwgB4AXKbXzw_+3u=9hPpvUTnu02Bg@mail.gmail.com/
 * Reunite the three UNIX resolving access rights back into one
   (resolving the previously open question about the access right
   structuring) Related discussion:
   https://lore.kernel.org/all/20260112.Wufar9coosoo@digikod.net/)
 * Sample tool: Add new UNIX lookup access rights to ACCESS_FILE

Changes in V2:
 * Send Justin Suess's LSM hook patch together with the Landlock
   implementation
 * LSM hook: Pass type and flags parameters to the hook, to make the
   access right more generally usable across LSMs, per suggestion from
   Paul Moore (Implemented by Justin)
 * Split the access right into the three types of UNIX domain sockets:
   SOCK_STREAM, SOCK_DGRAM and SOCK_SEQPACKET.
 * selftests: More exhaustive tests.
 * Removed a minor commit from V1 which adds a missing close(fd) to a
   test (it is already in the mic-next branch)

Günther Noack (11):
  landlock: Use mem_is_zero() in is_layer_masks_allowed()
  landlock: Replace union access_masks_all with helper functions
  landlock: Control pathname UNIX domain socket resolution by path
  landlock: Clarify BUILD_BUG_ON check in scoping logic
  samples/landlock: Add support for named UNIX domain socket
    restrictions
  selftests/landlock: Replace access_fs_16 with ACCESS_ALL in fs_test
  selftests/landlock: Test LANDLOCK_ACCESS_FS_RESOLVE_UNIX
  selftests/landlock: Audit test for LANDLOCK_ACCESS_FS_RESOLVE_UNIX
  selftests/landlock: Check that coredump sockets stay unrestricted
  selftests/landlock: fs_test: Simplify ruleset creation and enforcement
  landlock: Document FS access right for pathname UNIX sockets

Justin Suess (1):
  lsm: Add LSM hook security_unix_find

 Documentation/security/landlock.rst          |   42 +-
 Documentation/userspace-api/landlock.rst     |   16 +-
 include/linux/lsm_hook_defs.h                |    5 +
 include/linux/security.h                     |   11 +
 include/uapi/linux/landlock.h                |   21 +
 net/unix/af_unix.c                           |   10 +-
 samples/landlock/sandboxer.c                 |   12 +-
 security/landlock/access.h                   |   23 +-
 security/landlock/audit.c                    |    1 +
 security/landlock/cred.h                     |   10 +-
 security/landlock/fs.c                       |  135 +-
 security/landlock/limits.h                   |    2 +-
 security/landlock/ruleset.h                  |   13 +-
 security/landlock/syscalls.c                 |    2 +-
 security/landlock/task.c                     |    9 +-
 security/security.c                          |   20 +
 tools/testing/selftests/landlock/base_test.c |    2 +-
 tools/testing/selftests/landlock/fs_test.c   | 1345 ++++++++++--------
 18 files changed, 1025 insertions(+), 654 deletions(-)

Range-diff against v7:
 -:  ------------ >  1:  dec515e7d7bf lsm: Add LSM hook security_unix_find
 -:  ------------ >  2:  aaa334660a52 landlock: Use mem_is_zero() in is_layer_masks_allowed()
 -:  ------------ >  3:  79dd5036482d landlock: Replace union access_masks_all with helper functions
 1:  4d455134c5d9 !  4:  b39e336967de landlock: Control pathname UNIX domain socket resolution by path
    @@ Commit message
         Signed-off-by: Günther Noack <gnoack3000@gmail.com>

      ## Documentation/security/landlock.rst ##
    +@@ Documentation/security/landlock.rst: Landlock LSM: kernel documentation
    + ==================================
    + 
    + :Author: Mickaël Salaün
    +-:Date: September 2025
    ++:Date: March 2026
    + 
    + Landlock's goal is to create scoped access-control (i.e. sandboxing).  To
    + harden a whole system, this feature should be available to any process,
     @@ Documentation/security/landlock.rst: this is required to keep access controls consistent over the whole system, and
      this avoids unattended bypasses through file descriptor passing (i.e. confused
      deputy attack).
    @@ Documentation/security/landlock.rst: this is required to keep access controls co
      =====

    + ## Documentation/userspace-api/landlock.rst ##
    +@@ Documentation/userspace-api/landlock.rst: Landlock: unprivileged access control
    + =====================================
    + 
    + :Author: Mickaël Salaün
    +-:Date: January 2026
    ++:Date: March 2026
    + 
    + The goal of Landlock is to enable restriction of ambient rights (e.g. global
    + filesystem or network access) for a set of processes.  Because Landlock
    +
      ## include/uapi/linux/landlock.h ##
     @@ include/uapi/linux/landlock.h: struct landlock_net_port_attr {
       *
 3:  00c90045f470 =  5:  3c8678a4a482 landlock: Clarify BUILD_BUG_ON check in scoping logic
 2:  70354dfcefcb =  6:  d0c8731fb751 samples/landlock: Add support for named UNIX domain socket restrictions
 4:  b8e6477b2f9a =  7:  49fa86251f1b selftests/landlock: Replace access_fs_16 with ACCESS_ALL in fs_test
 5:  66554c5b5b9c =  8:  69b368c23083 selftests/landlock: Test LANDLOCK_ACCESS_FS_RESOLVE_UNIX
 6:  fab3d3d71215 =  9:  7306a1da2947 selftests/landlock: Audit test for LANDLOCK_ACCESS_FS_RESOLVE_UNIX
 7:  4e8a07344278 = 10:  e63e98c44e58 selftests/landlock: Check that coredump sockets stay unrestricted
 8:  fcaef2882dbd = 11:  497bcce69260 selftests/landlock: fs_test: Simplify ruleset creation and enforcement
 9:  362a0e8f84a0 = 12:  c4716fc8e131 landlock: Document FS access right for pathname UNIX sockets
-- 
2.53.0

^ permalink raw reply

* [PATCH v8 01/12] lsm: Add LSM hook security_unix_find
From: Günther Noack @ 2026-03-27 16:48 UTC (permalink / raw)
  To: Mickaël Salaün, John Johansen, Paul Moore, James Morris,
	Serge E . Hallyn
  Cc: Günther Noack, Tingmao Wang, Justin Suess,
	linux-security-module, Samasth Norway Ananda, Matthieu Buffet,
	Mikhail Ivanov, konstantin.meskhidze, Demi Marie Obenour,
	Alyssa Ross, Jann Horn, Tahera Fahimi, Sebastian Andrzej Siewior,
	Kuniyuki Iwashima, Georgia Garcia, Simon Horman, netdev,
	Alexander Viro, Christian Brauner
In-Reply-To: <20260327164838.38231-1-gnoack3000@gmail.com>

From: Justin Suess <utilityemal77@gmail.com>

Add an LSM hook security_unix_find.

This hook is called to check the path of a named UNIX socket before a
connection is initiated. The peer socket may be inspected as well.

Why existing hooks are unsuitable:

Existing socket hooks, security_unix_stream_connect(),
security_unix_may_send(), and security_socket_connect() don't provide
TOCTOU-free / namespace independent access to the paths of sockets.

(1) We cannot resolve the path from the struct sockaddr in existing hooks.
This requires another path lookup. A change in the path between the
two lookups will cause a TOCTOU bug.

(2) We cannot use the struct path from the listening socket, because it
may be bound to a path in a different namespace than the caller,
resulting in a path that cannot be referenced at policy creation time.

Consumers of the hook wishing to reference @other are responsible
for acquiring the unix_state_lock and checking for the SOCK_DEAD flag
therein, ensuring the socket hasn't died since lookup.

Cc: Günther Noack <gnoack3000@gmail.com>
Cc: Tingmao Wang <m@maowtm.org>
Cc: Mickaël Salaün <mic@digikod.net>
Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Justin Suess <utilityemal77@gmail.com>
Signed-off-by: Günther Noack <gnoack3000@gmail.com>
---
 include/linux/lsm_hook_defs.h |  5 +++++
 include/linux/security.h      | 11 +++++++++++
 net/unix/af_unix.c            | 10 +++++++---
 security/security.c           | 20 ++++++++++++++++++++
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 8c42b4bde09c..7a0fd3dbfa29 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -317,6 +317,11 @@ LSM_HOOK(int, 0, post_notification, const struct cred *w_cred,
 LSM_HOOK(int, 0, watch_key, struct key *key)
 #endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */
 
+#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
+LSM_HOOK(int, 0, unix_find, const struct path *path, struct sock *other,
+	 int flags)
+#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_SECURITY_NETWORK
 LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other,
 	 struct sock *newsk)
diff --git a/include/linux/security.h b/include/linux/security.h
index 83a646d72f6f..99a33d8eb28d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1931,6 +1931,17 @@ static inline int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
 }
 #endif	/* CONFIG_SECURITY_NETWORK */
 
+#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
+
+int security_unix_find(const struct path *path, struct sock *other, int flags);
+
+#else /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+static inline int security_unix_find(const struct path *path, struct sock *other, int flags)
+{
+	return 0;
+}
+#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_SECURITY_INFINIBAND
 int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey);
 int security_ib_endport_manage_subnet(void *sec, const char *name, u8 port_num);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3756a93dc63a..5ef3c2e31757 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1231,11 +1231,15 @@ static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
 		goto path_put;
 
 	err = -EPROTOTYPE;
-	if (sk->sk_type == type)
-		touch_atime(&path);
-	else
+	if (sk->sk_type != type)
 		goto sock_put;
 
+	err = security_unix_find(&path, sk, flags);
+	if (err)
+		goto sock_put;
+
+	touch_atime(&path);
+
 	path_put(&path);
 
 	return sk;
diff --git a/security/security.c b/security/security.c
index 67af9228c4e9..28ccea205874 100644
--- a/security/security.c
+++ b/security/security.c
@@ -4731,6 +4731,26 @@ int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
 
 #endif	/* CONFIG_SECURITY_NETWORK */
 
+#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
+/**
+ * security_unix_find() - Check if a named AF_UNIX socket can connect
+ * @path: path of the socket being connected to
+ * @other: peer sock
+ * @flags: flags associated with the socket
+ *
+ * This hook is called to check permissions before connecting to a named
+ * AF_UNIX socket. The caller does not hold any locks on @other.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_unix_find(const struct path *path, struct sock *other, int flags)
+{
+	return call_int_hook(unix_find, path, other, flags);
+}
+EXPORT_SYMBOL(security_unix_find);
+
+#endif	/* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_SECURITY_INFINIBAND
 /**
  * security_ib_pkey_access() - Check if access to an IB pkey is allowed
-- 
2.53.0


^ permalink raw reply related

* [PATCH v8 02/12] landlock: Use mem_is_zero() in is_layer_masks_allowed()
From: Günther Noack @ 2026-03-27 16:48 UTC (permalink / raw)
  To: Mickaël Salaün, John Johansen
  Cc: Günther Noack, linux-security-module, Tingmao Wang,
	Justin Suess, Samasth Norway Ananda, Matthieu Buffet,
	Mikhail Ivanov, konstantin.meskhidze, Demi Marie Obenour,
	Alyssa Ross, Jann Horn, Tahera Fahimi, Sebastian Andrzej Siewior,
	Kuniyuki Iwashima, Georgia Garcia
In-Reply-To: <20260327164838.38231-1-gnoack3000@gmail.com>

This is equivalent, but expresses the intent a bit clearer.

Signed-off-by: Günther Noack <gnoack3000@gmail.com>
---
 security/landlock/fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index a03ec664c78e..97065d51685a 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -564,7 +564,7 @@ static void test_no_more_access(struct kunit *const test)
 
 static bool is_layer_masks_allowed(const struct layer_access_masks *masks)
 {
-	return !memchr_inv(&masks->access, 0, sizeof(masks->access));
+	return mem_is_zero(&masks->access, sizeof(masks->access));
 }
 
 /*
-- 
2.53.0


^ permalink raw reply related

* [PATCH v8 03/12] landlock: Replace union access_masks_all with helper functions
From: Günther Noack @ 2026-03-27 16:48 UTC (permalink / raw)
  To: Mickaël Salaün, John Johansen
  Cc: Günther Noack, kernel test robot, linux-security-module,
	Tingmao Wang, Justin Suess, Samasth Norway Ananda,
	Matthieu Buffet, Mikhail Ivanov, konstantin.meskhidze,
	Demi Marie Obenour, Alyssa Ross, Jann Horn, Tahera Fahimi,
	Sebastian Andrzej Siewior, Kuniyuki Iwashima, Georgia Garcia
In-Reply-To: <20260327164838.38231-1-gnoack3000@gmail.com>

* Stop using a union for access_masks_all.
* Expose helper functions for intersection checks and union operations.

The memory layout of bitfields is only loosely defined by the C
standard, so our static assertion that expects a fixed size was
brittle, and it broke on some compilers when we attempted to add a
17th file system access right.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202603261438.jBx2DGNe-lkp@intel.com/
Signed-off-by: Günther Noack <gnoack3000@gmail.com>
---
 security/landlock/access.h  | 21 ++++++++++++++-------
 security/landlock/cred.h    | 10 ++--------
 security/landlock/ruleset.h | 13 ++++---------
 3 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/security/landlock/access.h b/security/landlock/access.h
index 42c95747d7bd..277b6ed7f7bb 100644
--- a/security/landlock/access.h
+++ b/security/landlock/access.h
@@ -52,14 +52,21 @@ struct access_masks {
 	access_mask_t scope : LANDLOCK_NUM_SCOPE;
 };
 
-union access_masks_all {
-	struct access_masks masks;
-	u32 all;
-};
+/* Checks whether two access masks have any common bit set. */
+static inline bool access_masks_intersect(const struct access_masks a,
+					  const struct access_masks b)
+{
+	return (a.fs & b.fs) || (a.net & b.net) || (a.scope & b.scope);
+}
 
-/* Makes sure all fields are covered. */
-static_assert(sizeof(typeof_member(union access_masks_all, masks)) ==
-	      sizeof(typeof_member(union access_masks_all, all)));
+/* ORs the bits of @src into @dst. */
+static inline void access_masks_merge(struct access_masks *dst,
+				      const struct access_masks src)
+{
+	dst->fs |= src.fs;
+	dst->net |= src.net;
+	dst->scope |= src.scope;
+}
 
 /**
  * struct layer_access_masks - A boolean matrix of layers and access rights
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
index f287c56b5fd4..207a6db1c086 100644
--- a/security/landlock/cred.h
+++ b/security/landlock/cred.h
@@ -123,9 +123,6 @@ landlock_get_applicable_subject(const struct cred *const cred,
 				const struct access_masks masks,
 				size_t *const handle_layer)
 {
-	const union access_masks_all masks_all = {
-		.masks = masks,
-	};
 	const struct landlock_ruleset *domain;
 	ssize_t layer_level;
 
@@ -138,11 +135,8 @@ landlock_get_applicable_subject(const struct cred *const cred,
 
 	for (layer_level = domain->num_layers - 1; layer_level >= 0;
 	     layer_level--) {
-		union access_masks_all layer = {
-			.masks = domain->access_masks[layer_level],
-		};
-
-		if (layer.all & masks_all.all) {
+		if (access_masks_intersect(domain->access_masks[layer_level],
+					   masks)) {
 			if (handle_layer)
 				*handle_layer = layer_level;
 
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 889f4b30301a..9f8b33815c2c 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -229,18 +229,13 @@ static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
 static inline struct access_masks
 landlock_union_access_masks(const struct landlock_ruleset *const domain)
 {
-	union access_masks_all matches = {};
+	struct access_masks matches = {};
 	size_t layer_level;
 
-	for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
-		union access_masks_all layer = {
-			.masks = domain->access_masks[layer_level],
-		};
+	for (layer_level = 0; layer_level < domain->num_layers; layer_level++)
+		access_masks_merge(&matches, domain->access_masks[layer_level]);
 
-		matches.all |= layer.all;
-	}
-
-	return matches.masks;
+	return matches;
 }
 
 static inline void
-- 
2.53.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox