* [PATCH v5 02/13] ima: Replace static htable queue with dynamically allocated array
From: Roberto Sassu @ 2026-04-29 16:03 UTC (permalink / raw)
To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
jmorris, serge
Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
gregorylumen, chenste, nramas, Roberto Sassu
In-Reply-To: <20260429160319.4162918-1-roberto.sassu@huaweicloud.com>
From: Roberto Sassu <roberto.sassu@huawei.com>
The IMA hash table is a fixed-size array of hlist_head buckets:
struct hlist_head ima_htable[IMA_MEASURE_HTABLE_SIZE];
IMA_MEASURE_HTABLE_SIZE is (1 << IMA_HASH_BITS) = 1024 buckets, each a
struct hlist_head (one pointer, 8 bytes on 64-bit). That is 8 KiB allocated
in BSS for every kernel, regardless of whether IMA is ever used, and
regardless of how many measurements are actually made.
Replace the fixed-size array with a RCU-protected pointer to a dynamically
allocated array that is initialized in ima_init_htable(), which is called
from ima_init() during early boot. ima_init_htable() calls the static
function ima_alloc_replace_htable() which, other than initializing the hash
table the first time, can also hot-swap the existing hash table with a
blank one.
The allocation in ima_alloc_replace_htable() uses kcalloc() so the buckets
are zero-initialised (equivalent to HLIST_HEAD_INIT { .first = NULL }).
Callers of ima_alloc_replace_htable() must call synchronize_rcu() and free
the returned hash table.
Finally, access the hash table with rcu_dereference() in
ima_lookup_digest_entry() (reader side) and with
rcu_dereference_protected() in ima_add_digest_entry() (writer side).
No functional change: bucket count, hash function, and all locking remain
identical.
Link: https://github.com/linux-integrity/linux/issues/1
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
---
security/integrity/ima/ima.h | 3 +-
security/integrity/ima/ima_init.c | 5 ++++
security/integrity/ima/ima_queue.c | 48 ++++++++++++++++++++++++++----
3 files changed, 50 insertions(+), 6 deletions(-)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 51a8a582df56..94bf890628e5 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -311,6 +311,7 @@ bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
int ima_restore_measurement_entry(struct ima_template_entry *entry);
int ima_restore_measurement_list(loff_t bufsize, void *buf);
int ima_measurements_show(struct seq_file *m, void *v);
+int __init ima_init_htable(void);
unsigned long ima_get_binary_runtime_size(void);
int ima_init_template(void);
void ima_init_template_list(void);
@@ -326,7 +327,7 @@ extern spinlock_t ima_queue_lock;
extern atomic_long_t ima_num_entries;
extern atomic_long_t ima_num_violations;
-extern struct hlist_head ima_htable[IMA_MEASURE_HTABLE_SIZE];
+extern struct hlist_head __rcu *ima_htable;
static inline unsigned int ima_hash_key(u8 *digest)
{
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index a2f34f2d8ad7..7e0aa09a12e6 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -140,6 +140,11 @@ int __init ima_init(void)
rc = ima_init_digests();
if (rc != 0)
return rc;
+
+ rc = ima_init_htable();
+ if (rc != 0)
+ return rc;
+
rc = ima_add_boot_aggregate(); /* boot aggregate must be first entry */
if (rc != 0)
return rc;
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index 1f6515f7d015..41f4941ceaad 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -38,9 +38,7 @@ atomic_long_t ima_num_entries = ATOMIC_LONG_INIT(0);
atomic_long_t ima_num_violations = ATOMIC_LONG_INIT(0);
/* key: inode (before secure-hashing a file) */
-struct hlist_head ima_htable[IMA_MEASURE_HTABLE_SIZE] = {
- [0 ... IMA_MEASURE_HTABLE_SIZE - 1] = HLIST_HEAD_INIT
-};
+struct hlist_head __rcu *ima_htable;
/* mutex protects atomicity of extending measurement list
* and extending the TPM PCR aggregate. Since tpm_extend can take
@@ -54,17 +52,53 @@ static DEFINE_MUTEX(ima_extend_list_mutex);
*/
static bool ima_measurements_suspended;
+/* Callers must call synchronize_rcu() and free the hash table. */
+static struct hlist_head *ima_alloc_replace_htable(void)
+{
+ struct hlist_head *old_htable, *new_htable;
+
+ /* Initializing to zeros is equivalent to call HLIST_HEAD_INIT. */
+ new_htable = kcalloc(IMA_MEASURE_HTABLE_SIZE, sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!new_htable)
+ return ERR_PTR(-ENOMEM);
+
+ old_htable = rcu_replace_pointer(ima_htable, new_htable,
+ lockdep_is_held(&ima_extend_list_mutex));
+
+ return old_htable;
+}
+
+int __init ima_init_htable(void)
+{
+ struct hlist_head *old_htable;
+
+ mutex_lock(&ima_extend_list_mutex);
+ old_htable = ima_alloc_replace_htable();
+ mutex_unlock(&ima_extend_list_mutex);
+
+ if (IS_ERR(old_htable))
+ return PTR_ERR(old_htable);
+
+ /* Synchronize_rcu() and kfree() not necessary, only for robustness. */
+ synchronize_rcu();
+ kfree(old_htable);
+ return 0;
+}
+
/* lookup up the digest value in the hash table, and return the entry */
static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value,
int pcr)
{
struct ima_queue_entry *qe, *ret = NULL;
+ struct hlist_head *htable;
unsigned int key;
int rc;
key = ima_hash_key(digest_value);
rcu_read_lock();
- hlist_for_each_entry_rcu(qe, &ima_htable[key], hnext) {
+ htable = rcu_dereference(ima_htable);
+ hlist_for_each_entry_rcu(qe, &htable[key], hnext) {
rc = memcmp(qe->entry->digests[ima_hash_algo_idx].digest,
digest_value, hash_digest_size[ima_hash_algo]);
if ((rc == 0) && (qe->entry->pcr == pcr)) {
@@ -104,6 +138,7 @@ static int ima_add_digest_entry(struct ima_template_entry *entry,
bool update_htable)
{
struct ima_queue_entry *qe;
+ struct hlist_head *htable;
unsigned int key;
qe = kmalloc_obj(*qe);
@@ -116,10 +151,13 @@ static int ima_add_digest_entry(struct ima_template_entry *entry,
INIT_LIST_HEAD(&qe->later);
list_add_tail_rcu(&qe->later, &ima_measurements);
+ htable = rcu_dereference_protected(ima_htable,
+ lockdep_is_held(&ima_extend_list_mutex));
+
atomic_long_inc(&ima_num_entries);
if (update_htable) {
key = ima_hash_key(entry->digests[ima_hash_algo_idx].digest);
- hlist_add_head_rcu(&qe->hnext, &ima_htable[key]);
+ hlist_add_head_rcu(&qe->hnext, &htable[key]);
}
if (binary_runtime_size != ULONG_MAX) {
--
2.43.0
^ permalink raw reply related
* [PATCH v5 00/13] ima: Introduce staging mechanism
From: Roberto Sassu @ 2026-04-29 16:03 UTC (permalink / raw)
To: corbet, skhan, zohar, dmitry.kasatkin, eric.snowberg, paul,
jmorris, serge
Cc: linux-doc, linux-kernel, linux-integrity, linux-security-module,
gregorylumen, chenste, nramas, Roberto Sassu
From: Roberto Sassu <roberto.sassu@huawei.com>
Introduction
============
The IMA measurements list is currently stored in the kernel memory.
Memory occupation grows linearly with the number of entries, and can
become a problem especially in environments with reduced resources.
While there is an advantage in keeping the IMA measurements list in
kernel memory, so that it is always available for reading from the
securityfs interfaces, storing it elsewhere would make it possible to
free precious memory for other kernel components.
Storing the IMA measurements list outside the kernel does not introduce
security issues, since its integrity is anyway protected by the TPM.
Hence, the new IMA staging mechanism is introduced to allow user space
to remove the desired portion of the measurements list from the kernel.
Usage
=====
The IMA staging mechanism can be enabled from the kernel configuration
with the CONFIG_IMA_STAGING option.
If it is enabled, IMA duplicates the current measurements interfaces
(both binary and ASCII), by adding the _staged file suffix. Both the
original and the staging interfaces gain the write permission for the
root user and group, but require the process to have CAP_SYS_ADMIN set.
The staging mechanism supports two flavors.
Staging with prompt
~~~~~~~~~~~~~~~~~~~
The current measurements list is moved to a temporary staging area, and
staged measurements are deleted upon confirmation.
This staging process is achieved with the following steps.
1. echo A > <original interface>: the user requests IMA to stage the
entire measurements list;
2. cat <_staged interface>: the user reads the staged measurements;
3. echo D > <_staged interface>: the user requests IMA to delete
staged measurements.
Staging and deleting
~~~~~~~~~~~~~~~~~~~~
N measurements are staged to a temporary staging area, and immediately
deleted without further confirmation.
This staging process is achieved with the following steps.
1. cat <original interface>: the user reads the current measurements
list and determines what the value N for staging should be;
2. echo N > <original interface>: the user requests IMA to delete N
measurements from the current measurements list.
Management of Staged Measurements
=================================
Since with the staging mechanism measurement entries are removed from
the kernel, the user needs to save the staged ones in a storage and
concatenate them together, so that it can present them to remote
attestation agents as if staging was never done.
Patch set content
=================
Patches 1-8 are preparatory patches to quickly replace the hash table,
maintain separate counters for the different measurements list types,
mediate access to the measurements list interface, and simplify the staging
patches.
Patch 9 introduces the staging with prompt flavor. Patch 10 makes it
possible to flush the hash table when deleting all the staged measurements.
Patch 11 introduces the staging and deleting flavor. Patch 12 avoids
measurements entries to be stored twice if there is contention between the
measurements interfaces and kexec. Patch 13 adds the documentation of the
staging mechanism.
Changelog
=========
v4:
- Add write permission to the original measurement interface, and move
the A and N staging commands to that interface
- Explain better the two staging flavors and highlight that the staging
and delete only stages measurements internally
- Rename ima_queue_staged_delete_partial() to ima_queue_delete_partial()
- Replace ima_staged_measurements_prepended with per measurements list
flag to avoid copying staged and active list measurements twice
- Optimize the staging and deleting flavor by locklessly determining the
cut position in the active list, and immediately deleting entries
without explicit staging and splicing (suggested by Steven Chen)
v3:
- Add Kconfig option to enable the staging mechanism (suggested by Mimi)
- Change the meaning of BINARY_STAGED to be just the staged measurements
- Separate the two staging flavors in two different functions:
ima_queue_staged_delete_all() for staging with prompt,
ima_queue_staged_delete_partial() for staging and deleting
- Delete N entries without staging first (suggested by Mimi)
- Avoid duplicate staged entries if there is contention between the
measurements list interfaces and kexec
v2:
- New patch to move measurements and violation counters outside the
ima_h_table structure
- New patch to quickly replace the hash table
- Forbid partial deletion when flushing hash table (suggested by Mimi)
- Ignore ima_flush_htable if CONFIG_IMA_DISABLE_HTABLE is enabled
- BINARY_SIZE_* renamed to BINARY_* for better clarity
- Removed ima_measurements_staged_exist and testing list empty instead
- ima_queue_stage_trim() and ima_queue_delete_staged_trimmed() renamed to
ima_queue_stage() and ima_queue_delete_staged()
- New delete interval [1, ULONG_MAX - 1]
- Rename ima_measure_lock to ima_measure_mutex
- Move seq_open() and seq_release() outside the ima_measure_mutex lock
- Drop ima_measurements_staged_read() and use seq_read() instead
- Optimize create_securityfs_measurement_lists() changes
- New file name format with _staged suffix at the end of the file name
- Use _rcu list variant in ima_dump_measurement_list()
- Remove support for direct trimming and splice the remaining entries to
the active list (suggested by Mimi)
- Hot swap the hash table if flushing is requested
v1:
- Support for direct trimming without staging
- Support unstaging on kexec (requested by Gregory Lumen)
Roberto Sassu (13):
ima: Remove ima_h_table structure
ima: Replace static htable queue with dynamically allocated array
ima: Introduce per binary measurements list type ima_num_entries
counter
ima: Introduce per binary measurements list type binary_runtime_size
value
ima: Introduce _ima_measurements_start() and _ima_measurements_next()
ima: Mediate open/release method of the measurements list
ima: Use snprintf() in create_securityfs_measurement_lists
ima: Introduce ima_dump_measurement()
ima: Add support for staging measurements with prompt
ima: Add support for flushing the hash table when staging measurements
ima: Support staging and deleting N measurements entries
ima: Return error on deleting measurements already copied during kexec
doc: security: Add documentation of the IMA staging mechanism
.../admin-guide/kernel-parameters.txt | 4 +
Documentation/security/IMA-staging.rst | 163 +++++++++
Documentation/security/index.rst | 1 +
MAINTAINERS | 2 +
security/integrity/ima/Kconfig | 16 +
security/integrity/ima/ima.h | 32 +-
security/integrity/ima/ima_api.c | 2 +-
security/integrity/ima/ima_fs.c | 315 ++++++++++++++++--
security/integrity/ima/ima_init.c | 5 +
security/integrity/ima/ima_kexec.c | 53 ++-
security/integrity/ima/ima_queue.c | 283 ++++++++++++++--
11 files changed, 803 insertions(+), 73 deletions(-)
create mode 100644 Documentation/security/IMA-staging.rst
--
2.43.0
^ permalink raw reply
* Re: [RFC PATCH v2 1/4] security: ima: call ima_init() again at late_initcall_sync for defered TPM
From: Roberto Sassu @ 2026-04-29 13:33 UTC (permalink / raw)
To: Paul Moore, Mimi Zohar
Cc: Yeoreum Yun, roberto.sassu, Jonathan McDowell,
linux-security-module, linux-kernel, linux-integrity,
linux-arm-kernel, kvmarm, jmorris, serge, dmitry.kasatkin,
eric.snowberg, jarkko, jgg, sudeep.holla, maz, oupton, joey.gouly,
suzuki.poulose, yuzenghui, catalin.marinas, will, noodles,
sebastianene
In-Reply-To: <CAHC9VhS_WgwhW_NDO91LoTeSzdieGqbwqnwPq8KpavH1_Lwi7g@mail.gmail.com>
On Mon, 2026-04-27 at 21:31 -0400, Paul Moore wrote:
> On Fri, Apr 24, 2026 at 6:49 PM Mimi Zohar <zohar@linux.ibm.com> wrote:
> > On Fri, 2026-04-24 at 18:10 -0400, Paul Moore wrote:
> > > (I'm assuming you meant initcall and not syscall above, but if you're
> > > talking about something else, please let me know.)
> > >
> > > Saying that you aren't comfortable moving IMA initialization to
> > > late-sync is inconsistent with allowing IMA initialization to be
> > > deferred to late-sync. Either it is okay to initialize IMA in
> > > late-sync or it isn't. You must pick one.
> >
> > Yes, we're discussing late_initcall and late_initcall_sync.
> >
> > I prefer to look at it as being pragmatic. I'd rather err on the side of caution
> > and not move the syscall to late_initcall_sync, than move it.
>
> If you were truly erring on the side of caution you wouldn't allow
> late-sync initialization without knowing if it was safe or not.
> Determine whether IMA initialization is safe at late-sync. If it is
> safe, move the init to late-sync; if not, keep it at late and figure
> out another mechanism to sync with the TPM availability. If needed,
> you could probably use the LSM notifier to enable the TPM driver to
> signal when it is up and running.
Yes, I agree with you, or transition or not.
However, all of this looks very fragile and easy to be broken. If we
want to be on the safe side, we can use any notification mechanism that
is suitable, but at the same time from IMA side we need to deny any
file access that would require a measurement until the TPM comes up.
If you accept this, I don't have any problem to move to late_sync.
Roberto
^ permalink raw reply
* [linus:master] [proc] 599bbba5a3: kernel-selftests.mm.ksft_mkdirty.sh.mkdirty.fail
From: kernel test robot @ 2026-04-29 8:26 UTC (permalink / raw)
To: Linus Torvalds, Mark Brown
Cc: oe-lkp, lkp, linux-kernel, Vova Tokarev, linux-security-module,
oliver.sang
Hello,
by this commit, we noticed the expected config diff:
==================== PARENT FIRST_BAD KCONFIGS fdcbb1bc06508eb7ad961b3876b16382ae678ef8 ====================
--- /pkg/linux/x86_64-rhel-9.4-kselftests/gcc-14/fdcbb1bc06508eb7ad961b3876b16382ae678ef8/.config 2026-04-26 22:54:11.801204874 +0200
+++ /pkg/linux/x86_64-rhel-9.4-kselftests/gcc-14/599bbba5a36f6de57ab14c373c25881e2b5273f5/.config 2026-04-26 22:01:12.692502621 +0200
@@ -8959,8 +8959,8 @@ CONFIG_ENCRYPTED_KEYS=y
CONFIG_KEY_DH_OPERATIONS=y
CONFIG_KEY_NOTIFICATIONS=y
# CONFIG_SECURITY_DMESG_RESTRICT is not set
-CONFIG_PROC_MEM_ALWAYS_FORCE=y
-# CONFIG_PROC_MEM_FORCE_PTRACE is not set
+# CONFIG_PROC_MEM_ALWAYS_FORCE is not set
+CONFIG_PROC_MEM_FORCE_PTRACE=y
# CONFIG_PROC_MEM_NO_FORCE is not set
CONFIG_SECURITY=y
CONFIG_HAS_SECURITY_AUDIT=y
then we saw below two cases failed upon this commit:
=========================================================================================
tbox_group/testcase/rootfs/kconfig/compiler/group/sc_nr_hugepages:
igk-rpl-d03/kernel-selftests/debian-13-x86_64-20250902.cgz/x86_64-rhel-9.4-kselftests/gcc-14/mm/2
fdcbb1bc06508eb7 599bbba5a36f6de57ab14c373c2
---------------- ---------------------------
fail:runs %reproduction fail:runs
| | |
:9 67% 6:6 kernel-selftests.mm.ksft_ksm.sh.fail
:9 67% 6:6 kernel-selftests.mm.ksft_mkdirty.sh.fail
we also noticed there is a fix commit in linux-next/master now, but we still
saw same failures in our tests if directly test upon it.
below full report FYI.
kernel test robot noticed "kernel-selftests.mm.ksft_mkdirty.sh.mkdirty.fail" on:
commit: 599bbba5a36f6de57ab14c373c25881e2b5273f5 ("proc: make PROC_MEM_FORCE_PTRACE the Kconfig default")
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git master
[test failed on linus/master 254f49634ee16a731174d2ae34bc50bd5f45e731]
[test failed on linux-next/master 7080e32d3f09d8688c4a87d81bdcc71f7f606b16]
[test failed on fix commit a3907a3169d09ebaeef9631ab6a4534314545ef7]
in testcase: kernel-selftests
version: kernel-selftests-x86_64-9f2693489ef8-1_20260201
with following parameters:
group: mm
sc_nr_hugepages: 2
config: x86_64-rhel-9.4-kselftests
compiler: gcc-14
test machine: 16 threads Intel(R) Core(TM) i7-13620H (Raptor Lake) with 32G memory
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202604290921.ac2bf5ff-lkp@intel.com
below is from commit 599bbba5a3:
# timeout set to 900
# selftests: mm: ksft_ksm.sh
# TAP version 13
# # -----------------------------
# # running ./ksm_tests -H -s 100
# # -----------------------------
# # Number of normal pages: 0
# # Number of huge pages: 50
# # Total size: 100 MiB
# # Total time: 0.196202091 s
# # Average speed: 509.679 MiB/s
# # [PASS]
# ok 1 ksm_tests -H -s 100
# # -----------------------------
# # running ./ksm_tests -P -s 100
# # -----------------------------
# # Total size: 100 MiB
# # Total time: 0.201963596 s
# # Average speed: 495.139 MiB/s
# # [PASS]
# ok 2 ksm_tests -P -s 100
# # ----------------------------
# # running ./ksm_tests -M -p 10
# # ----------------------------
# # OK
# # [PASS]
# ok 3 ksm_tests -M -p 10
# # ----------------------
# # running ./ksm_tests -U
# # ----------------------
# # OK
# # [PASS]
# ok 4 ksm_tests -U
# # ---------------------------------
# # running ./ksm_tests -Z -p 10 -z 0
# # ---------------------------------
# # OK
# # [PASS]
# ok 5 ksm_tests -Z -p 10 -z 0
# # ---------------------------------
# # running ./ksm_tests -Z -p 10 -z 1
# # ---------------------------------
# # OK
# # [PASS]
# ok 6 ksm_tests -Z -p 10 -z 1
# # ------------------------------
# # running ./ksm_functional_tests
# # ------------------------------
# # TAP version 13
# # 1..10
# # # [RUN] test_unmerge
# # ok 1 Pages were unmerged
# # # [RUN] test_unmerge_zero_pages
# # ok 2 KSM zero pages were unmerged
# # # [RUN] test_unmerge_discarded
# # ok 3 Pages were unmerged
# # # [RUN] test_unmerge_uffd_wp
# # ok 4 Pages were unmerged
# # # [RUN] test_prot_none
# # not ok 5 ptrace write failed
# # # [RUN] test_prctl
# # ok 6 Setting/clearing PR_SET_MEMORY_MERGE works
# # # [RUN] test_prctl_fork
# # ok 7 PR_SET_MEMORY_MERGE value is inherited
# # # [RUN] test_prctl_fork_exec
# # ok 8 PR_SET_MEMORY_MERGE value is inherited
# # # [RUN] test_prctl_unmerge
# # ok 9 Pages were unmerged
# # # [RUN] test_fork_ksm_merging_page_count
# # ok 10 ksm_merging_pages is not inherited after fork
# # Bail out! 1 out of 10 tests failed
# # # Totals: pass:9 fail:1 xfail:0 xpass:0 skip:0 error:0
# # [FAIL]
# not ok 7 ksm_functional_tests # exit=1
# hwpoison_inject
# # SUMMARY: PASS=6 SKIP=0 FAIL=1
# 1..7
not ok 7 selftests: mm: ksft_ksm.sh # exit=1
...
# timeout set to 900
# selftests: mm: ksft_mkdirty.sh
# TAP version 13
# # -----------------
# # running ./mkdirty
# # -----------------
# # # [INFO] detected THP size: 2048 KiB
# # TAP version 13
# # 1..6
# # # [INFO] PTRACE write access
# # not ok 1 write() failed
# # # [INFO] PTRACE write access to THP
# # not ok 2 write() failed
# # # [INFO] Page migration
# # ok 3 SIGSEGV generated, page not modified
# # # [INFO] Page migration of THP
# # ok 4 SIGSEGV generated, page not modified
# # # [INFO] PTE-mapping a THP
# # ok 5 SIGSEGV generated, page not modified
# # # [INFO] UFFDIO_COPY
# # ok 6 SIGSEGV generated, page not modified
# # Bail out! 2 out of 6 tests failed
# # # Totals: pass:4 fail:2 xfail:0 xpass:0 skip:0 error:0
# # [FAIL]
# not ok 1 mkdirty # exit=1
# hwpoison_inject
# # SUMMARY: PASS=0 SKIP=0 FAIL=1
# 1..1
not ok 14 selftests: mm: ksft_mkdirty.sh # exit=1
below is from
[test failed on fix commit a3907a3169d09ebaeef9631ab6a4534314545ef7]
# timeout set to 900
# selftests: mm: ksft_ksm.sh
# TAP version 13
# # -----------------------------
# # running ./ksm_tests -H -s 100
# # -----------------------------
# # Number of normal pages: 0
# # Number of huge pages: 50
# # Total size: 100 MiB
# # Total time: 0.201459173 s
# # Average speed: 496.378 MiB/s
# # [PASS]
# ok 1 ksm_tests -H -s 100
# # -----------------------------
# # running ./ksm_tests -P -s 100
# # -----------------------------
# # Total size: 100 MiB
# # Total time: 0.197418993 s
# # Average speed: 506.537 MiB/s
# # [PASS]
# ok 2 ksm_tests -P -s 100
# # ----------------------------
# # running ./ksm_tests -M -p 10
# # ----------------------------
# # OK
# # [PASS]
# ok 3 ksm_tests -M -p 10
# # ----------------------
# # running ./ksm_tests -U
# # ----------------------
# # OK
# # [PASS]
# ok 4 ksm_tests -U
# # ---------------------------------
# # running ./ksm_tests -Z -p 10 -z 0
# # ---------------------------------
# # OK
# # [PASS]
# ok 5 ksm_tests -Z -p 10 -z 0
# # ---------------------------------
# # running ./ksm_tests -Z -p 10 -z 1
# # ---------------------------------
# # OK
# # [PASS]
# ok 6 ksm_tests -Z -p 10 -z 1
# # ------------------------------
# # running ./ksm_functional_tests
# # ------------------------------
# # TAP version 13
# # 1..10
# # # [RUN] test_unmerge
# # ok 1 Pages were unmerged
# # # [RUN] test_unmerge_zero_pages
# # ok 2 KSM zero pages were unmerged
# # # [RUN] test_unmerge_discarded
# # ok 3 Pages were unmerged
# # # [RUN] test_unmerge_uffd_wp
# # ok 4 Pages were unmerged
# # # [RUN] test_prot_none
# # not ok 5 ptrace write failed
# # # [RUN] test_prctl
# # ok 6 Setting/clearing PR_SET_MEMORY_MERGE works
# # # [RUN] test_prctl_fork
# # ok 7 PR_SET_MEMORY_MERGE value is inherited
# # # [RUN] test_prctl_fork_exec
# # ok 8 PR_SET_MEMORY_MERGE value is inherited
# # # [RUN] test_prctl_unmerge
# # ok 9 Pages were unmerged
# # # [RUN] test_fork_ksm_merging_page_count
# # ok 10 ksm_merging_pages is not inherited after fork
# # Bail out! 1 out of 10 tests failed
# # # Totals: pass:9 fail:1 xfail:0 xpass:0 skip:0 error:0
# # [FAIL]
# not ok 7 ksm_functional_tests # exit=1
# hwpoison_inject
# # SUMMARY: PASS=6 SKIP=0 FAIL=1
# 1..7
not ok 7 selftests: mm: ksft_ksm.sh # exit=1
...
# timeout set to 900
# selftests: mm: ksft_mkdirty.sh
# TAP version 13
# # -----------------
# # running ./mkdirty
# # -----------------
# # # [INFO] detected THP size: 2048 KiB
# # TAP version 13
# # 1..6
# # # [INFO] PTRACE write access
# # not ok 1 write() failed
# # # [INFO] PTRACE write access to THP
# # not ok 2 write() failed
# # # [INFO] Page migration
# # ok 3 SIGSEGV generated, page not modified
# # # [INFO] Page migration of THP
# # ok 4 SIGSEGV generated, page not modified
# # # [INFO] PTE-mapping a THP
# # ok 5 SIGSEGV generated, page not modified
# # # [INFO] UFFDIO_COPY
# # ok 6 SIGSEGV generated, page not modified
# # Bail out! 2 out of 6 tests failed
# # # Totals: pass:4 fail:2 xfail:0 xpass:0 skip:0 error:0
# # [FAIL]
# not ok 1 mkdirty # exit=1
# hwpoison_inject
# # SUMMARY: PASS=0 SKIP=0 FAIL=1
# 1..1
not ok 14 selftests: mm: ksft_mkdirty.sh # exit=1
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20260429/202604290921.ac2bf5ff-lkp@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply
* [PATCH 04/14] security/Kconfig.hardening: Remove tautological condition from CC_HAS_RANDSTRUCT
From: Nathan Chancellor @ 2026-04-29 2:59 UTC (permalink / raw)
To: Nathan Chancellor, Nicolas Schier, Bill Wendling, Justin Stitt,
Nick Desaulniers
Cc: linux-kernel, llvm, linux-kbuild, Kees Cook, Gustavo A. R. Silva,
linux-hardening, linux-security-module
In-Reply-To: <20260428-bump-minimum-supported-llvm-version-to-17-v1-0-81d9b2e8ee75@kernel.org>
Now that the minimum supported version of LLVM for building the kernel
has been raised to 17.0.1, the '!Clang || Clang >= 16' dependency for
CONFIG_CC_HAS_RANDSTRUCT is always true, so it can be removed.
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
Cc: Kees Cook <kees@kernel.org>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: linux-hardening@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
---
security/Kconfig.hardening | 3 ---
1 file changed, 3 deletions(-)
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index e4f23c08a17a..b90cf9ed4642 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -274,9 +274,6 @@ endmenu
config CC_HAS_RANDSTRUCT
def_bool $(cc-option,-frandomize-layout-seed-file=/dev/null)
- # Randstruct was first added in Clang 15, but it isn't safe to use until
- # Clang 16 due to https://github.com/llvm/llvm-project/issues/60349
- depends on !CC_IS_CLANG || CLANG_VERSION >= 160000
choice
prompt "Randomize layout of sensitive kernel structures"
--
2.54.0
^ permalink raw reply related
* [PATCH 03/14] security/Kconfig.hardening: Remove tautological condition from FORTIFY_SOURCE
From: Nathan Chancellor @ 2026-04-29 2:59 UTC (permalink / raw)
To: Nathan Chancellor, Nicolas Schier, Bill Wendling, Justin Stitt,
Nick Desaulniers
Cc: linux-kernel, llvm, linux-kbuild, Kees Cook, Gustavo A. R. Silva,
linux-hardening, linux-security-module
In-Reply-To: <20260428-bump-minimum-supported-llvm-version-to-17-v1-0-81d9b2e8ee75@kernel.org>
Now that the minimum supported version of LLVM for building the kernel
has been raised to 17.0.1, the '!X86_32 || !Clang || Clang > 16'
dependency of CONFIG_FORTIFY_SOURCE is always true, so it can be
removed.
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
Cc: Kees Cook <kees@kernel.org>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: linux-hardening@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
---
security/Kconfig.hardening | 2 --
1 file changed, 2 deletions(-)
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index a0461d648396..e4f23c08a17a 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -213,8 +213,6 @@ menu "Bounds checking"
config FORTIFY_SOURCE
bool "Harden common str/mem functions against buffer overflows"
depends on ARCH_HAS_FORTIFY_SOURCE
- # https://github.com/llvm/llvm-project/issues/53645
- depends on !X86_32 || !CC_IS_CLANG || CLANG_VERSION >= 160000
help
Detect overflows of buffers in common string and memory functions
where the compiler can determine and validate the buffer sizes.
--
2.54.0
^ permalink raw reply related
* [PATCH 02/14] security/Kconfig.hardening: Remove tautological condition from CC_HAS_ZERO_CALL_USED_REGS
From: Nathan Chancellor @ 2026-04-29 2:59 UTC (permalink / raw)
To: Nathan Chancellor, Nicolas Schier, Bill Wendling, Justin Stitt,
Nick Desaulniers
Cc: linux-kernel, llvm, linux-kbuild, Kees Cook, Gustavo A. R. Silva,
linux-hardening, linux-security-module
In-Reply-To: <20260428-bump-minimum-supported-llvm-version-to-17-v1-0-81d9b2e8ee75@kernel.org>
Now that the minimum supported version of LLVM for building the kernel
has been raised to 17.0.1, the '!Clang || Clang > 15.0.6' dependency for
CONFIG_CC_HAS_ZERO_CALL_USED_REGS is always true, so it can be removed.
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
Cc: Kees Cook <kees@kernel.org>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: linux-hardening@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
---
security/Kconfig.hardening | 3 ---
1 file changed, 3 deletions(-)
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 86f8768c63d4..a0461d648396 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -189,9 +189,6 @@ config INIT_ON_FREE_DEFAULT_ON
config CC_HAS_ZERO_CALL_USED_REGS
def_bool $(cc-option,-fzero-call-used-regs=used-gpr)
- # https://github.com/ClangBuiltLinux/linux/issues/1766
- # https://github.com/llvm/llvm-project/issues/59242
- depends on !CC_IS_CLANG || CLANG_VERSION > 150006
config ZERO_CALL_USED_REGS
bool "Enable register zeroing on function exit"
--
2.54.0
^ permalink raw reply related
* [PATCH 00/14] Bump minimum version of LLVM for building the kernel to 17.0.1
From: Nathan Chancellor @ 2026-04-29 2:59 UTC (permalink / raw)
To: Nathan Chancellor, Nicolas Schier, Bill Wendling, Justin Stitt,
Nick Desaulniers
Cc: linux-kernel, llvm, linux-kbuild, Jonathan Corbet, Shuah Khan,
linux-doc, Kees Cook, Gustavo A. R. Silva, linux-hardening,
linux-security-module, Rong Xu, Han Shen, Russell King,
Arnd Bergmann, linux-arm-kernel, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Alexandre Ghiti, linux-riscv, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Peter Zijlstra, Ard Biesheuvel
The current minimum version of LLVM for building the kernel is 15.0.0.
However, there are two deficiencies compared to GCC that were fixed in
LLVM 17 that are starting to become more noticeable.
The first was a bug in LLVM's scope checker [1], where all labels in a
function were validated as potential targets of an asm goto statement,
even if they were not listed in the asm goto statement as targets. This
becomes particularly problematic when the cleanup attribute is used, as
asm goto(... : label_a);
...
label_a:
...
int var __free(foo);
asm goto(... : label_b);
...
label_b:
...
will trigger an error since the scope checker will complain that the
cleanup variable would be skipped when jumping from the first asm goto
to label_b (which obviously cannot happen). This issue was the catalyst
for commit e2ffa15b9baa ("kbuild: Disable CC_HAS_ASM_GOTO_OUTPUT on
clang < 17"). Unfortunately, this issue is reproducible with regular asm
goto in addition to asm goto with outputs, so that change was not
entirely sufficient to avoid the issue altogether. As asm goto has
effectively been required since commit a0a12c3ed057 ("asm goto:
eradicate CC_HAS_ASM_GOTO") and the usage of the cleanup attribute
continues to grow across the tree, raising the minimum to a version that
avoids this issue altogether is a better long term solution than
attempting to workaround it at every spot where it happens.
The second issue is an incompatibility with GCC 8.1+ around variables
marked with const being valid constant expressions for _Static_assert
and other macros [2]. With GCC 8.1 being the minimum supported version
since commit 118c40b7b503 ("kbuild: require gcc-8 and binutils-2.30"),
this incompatibility becomes more of a maintenance burden since only
clang-15 and clang-16 are affected by it.
Looking at the clang version of various major distributions through
Docker images, no one should be left behind as a result of this bump, as
the old ones cannot clear the current minimum of 15.0.0.
archlinux:latest clang version 22.1.3
debian:oldoldstable-slim Debian clang version 11.0.1-2
debian:oldstable-slim Debian clang version 14.0.6
debian:stable-slim Debian clang version 19.1.7 (3+b1)
debian:testing-slim Debian clang version 21.1.8 (3+b1)
debian:unstable-slim Debian clang version 21.1.8 (7+b1)
fedora:42 clang version 20.1.8 (Fedora 20.1.8-4.fc42)
fedora:latest clang version 21.1.8 (Fedora 21.1.8-4.fc43)
fedora:44 clang version 22.1.1 (Fedora 22.1.1-2.fc44)
fedora:rawhide clang version 22.1.3 (Fedora 22.1.3-1.fc45)
opensuse/leap:latest clang version 17.0.6
opensuse/tumbleweed:latest clang version 21.1.8
ubuntu:jammy Ubuntu clang version 14.0.0-1ubuntu1.1
ubuntu:noble Ubuntu clang version 18.1.3 (1ubuntu1)
ubuntu:questing Ubuntu clang version 20.1.8 (0ubuntu4)
ubuntu:resolute Ubuntu clang version 21.1.8 (6ubuntu1)
17.0.1 is chosen as the minimum instead of 17.0.0 to ensure that the
particular version of LLVM 17 has the two aforementioned bugs fixed, as
the second was fixed during the 17.0.0 release candidate phase and it
was not until LLVM 18 that LLVM adopted the scheme of x.0.0 being a
prerelease version and x.1.0 is a release version [3] to help with
scenarios such as this.
The first patch in the series does the actual bump. The remaining
patches are cleanups of workarounds for various issues that are no
longer needed with the bump.
I plan to take this via the Kbuild tree for 7.2, please provide Acks as
necessary.
[1]: https://github.com/llvm/llvm-project/commit/f023f5cdb2e6c19026f04a15b5a935c041835d14
[2]: https://github.com/llvm/llvm-project/commit/0b2d5b967d98375793897295d651f58f6fbd3034
[3]: https://github.com/llvm/llvm-project/commit/4532617ae420056bf32f6403dde07fb99d276a49
---
Nathan Chancellor (14):
kbuild: Bump minimum version of LLVM for building the kernel to 17.0.1
security/Kconfig.hardening: Remove tautological condition from CC_HAS_ZERO_CALL_USED_REGS
security/Kconfig.hardening: Remove tautological condition from FORTIFY_SOURCE
security/Kconfig.hardening: Remove tautological condition from CC_HAS_RANDSTRUCT
arch/Kconfig: Remove tautological conditions from HAS_LTO_CLANG
arch/Kconfig: Remove tautological condition from AUTOFDO_CLANG
ARM: Drop tautological ld.lld conditions from ARCH_MULTI_V4{,T}
riscv: Remove tautological condition from selection of ARCH_SUPPORTS_CFI
riscv: Drop tautological condition from TOOLCHAIN_NEEDS_OLD_ISA_SPEC
scripts/Makefile.warn: Drop -Wformat handling for clang < 16
x86/build: Drop unused '-ffreestanding' addition to KBUILD_CFLAGS
x86/module: Revert "Deal with GOT based stack cookie load on Clang < 17"
x86/entry/vdso32: Remove conditional omission of '.cfi_offset eflags'
kbuild: Remove check for broken scoping with clang < 17 in CC_HAS_ASM_GOTO_OUTPUT
Documentation/process/changes.rst | 2 +-
arch/Kconfig | 5 +----
arch/arm/Kconfig.platforms | 4 ----
arch/riscv/Kconfig | 16 +++++++---------
arch/x86/Makefile | 5 -----
arch/x86/entry/vdso/vdso32/sigreturn.S | 10 ----------
arch/x86/include/asm/elf.h | 5 ++---
arch/x86/kernel/module.c | 15 ---------------
init/Kconfig | 3 ---
scripts/Makefile.warn | 10 ----------
scripts/min-tool-version.sh | 2 +-
security/Kconfig.hardening | 8 --------
12 files changed, 12 insertions(+), 73 deletions(-)
---
base-commit: 254f49634ee16a731174d2ae34bc50bd5f45e731
change-id: 20260422-bump-minimum-supported-llvm-version-to-17-b4638a58b043
Best regards,
--
Nathan Chancellor <nathan@kernel.org>
^ permalink raw reply
* [PATCH 1/1] yama: clean-up ptrace relations upon activating YAMA_SCOPE_NO_ATTACH
From: Ethan Ferguson @ 2026-04-28 19:28 UTC (permalink / raw)
To: kees, paul, jmorris, serge
Cc: linux-security-module, linux-kernel, Ethan Ferguson
In-Reply-To: <20260428192818.1035760-1-ethan.ferguson@zetier.com>
Clean up ptracer_relations upon YAMA_SCOPE_NO_ATTACH, and prevent
further modification by processes.
Signed-off-by: Ethan Ferguson <ethan.ferguson@zetier.com>
---
security/yama/yama_lsm.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index cef3776cf3b2..3b7c5384e6bc 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -26,6 +26,7 @@
#define YAMA_SCOPE_NO_ATTACH 3
static int ptrace_scope = YAMA_SCOPE_RELATIONAL;
+static int max_scope = YAMA_SCOPE_NO_ATTACH;
/* describe a ptrace relationship for potential exception */
struct ptrace_relation {
@@ -119,7 +120,7 @@ static void yama_relation_cleanup(struct work_struct *work)
spin_lock(&ptracer_relations_lock);
rcu_read_lock();
list_for_each_entry_rcu(relation, &ptracer_relations, node) {
- if (relation->invalid) {
+ if (relation->invalid || ptrace_scope == max_scope) {
list_del_rcu(&relation->node);
kfree_rcu(relation, rcu);
}
@@ -204,7 +205,8 @@ static void yama_ptracer_del(struct task_struct *tracer,
*/
static void yama_task_free(struct task_struct *task)
{
- yama_ptracer_del(task, task);
+ if (ptrace_scope <= max_scope)
+ yama_ptracer_del(task, task);
}
/**
@@ -224,6 +226,9 @@ static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3,
int rc = -ENOSYS;
struct task_struct *myself;
+ if (ptrace_scope == max_scope)
+ return -EPERM;
+
switch (option) {
case PR_SET_PTRACER:
/* Since a thread can call prctl(), find the group leader
@@ -432,6 +437,7 @@ static struct security_hook_list yama_hooks[] __ro_after_init = {
static int yama_dointvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
+ int ret;
struct ctl_table table_copy;
if (write && !capable(CAP_SYS_PTRACE))
@@ -442,10 +448,17 @@ static int yama_dointvec_minmax(const struct ctl_table *table, int write,
if (*(int *)table_copy.data == *(int *)table_copy.extra2)
table_copy.extra1 = table_copy.extra2;
- return proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos);
-}
+ ret = proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos);
+ if (ret < 0)
+ return ret;
-static int max_scope = YAMA_SCOPE_NO_ATTACH;
+ /* If max_scope was just activated in this call */
+ if (*(int *)table_copy.data == *(int *)table_copy.extra2 &&
+ table_copy.extra1 != table_copy.extra2)
+ schedule_work(&yama_relation_work);
+
+ return 0;
+}
static const struct ctl_table yama_sysctl_table[] = {
{
--
2.43.0
^ permalink raw reply related
* [PATCH 0/1] yama: clean-up ptrace relations upon activating YAMA_SCOPE_NO_ATTACH
From: Ethan Ferguson @ 2026-04-28 19:28 UTC (permalink / raw)
To: kees, paul, jmorris, serge
Cc: linux-security-module, linux-kernel, Ethan Ferguson
Once yama's ptrace_scope gets set to it's max value (currently
YAMA_SCOPE_NO_ATTACH), all ptrace actions will forever be denied.
However, processes may still add ptrace relations, and the memory
to store these relations is still allocated, even though it is never
used again.
This patch cleans up all memory related to ptracer_relations upon
YAMA_SCOPE_NO_ATTACH, and additionally disallows further modification
of ptracer_relations from processes.
Ethan Ferguson (1):
yama: clean-up ptrace relations upon activating YAMA_SCOPE_NO_ATTACH
security/yama/yama_lsm.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
base-commit: cf2f06f7152d
--
2.43.0
^ permalink raw reply
* Re: [PATCH ported/repost v2] security,fs,nfs,net: update security_inode_listsecurity() interface
From: Paul Moore @ 2026-04-28 19:26 UTC (permalink / raw)
To: selinux, linux-security-module, linux-fsdevel, linux-nfs
Cc: stephen.smalley.work
In-Reply-To: <20260428192119.226244-2-paul@paul-moore.com>
On Tue, Apr 28, 2026 at 3:21 PM Paul Moore <paul@paul-moore.com> wrote:
>
> From: Stephen Smalley <stephen.smalley.work@gmail.com>
>
> Update the security_inode_listsecurity() interface to allow
> use of the xattr_list_one() helper and update the hook
> implementations.
>
> Link: https://lore.kernel.org/selinux/20250424152822.2719-1-stephen.smalley.work@gmail.com
> Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com>
> [PM: forward porting to bring this patch up to v7.1-rc1+]
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
> fs/nfs/nfs4proc.c | 7 ++-----
> fs/xattr.c | 11 +++++++----
> include/linux/lsm_hook_defs.h | 4 ++--
> include/linux/security.h | 5 +++--
> security/security.c | 16 ++++++++--------
> security/selinux/hooks.c | 10 +++-------
> security/smack/smack_lsm.c | 13 ++++---------
> 7 files changed, 29 insertions(+), 37 deletions(-)
With the security_inode_listsecurity() cleanup shipping in Linux v7.0,
I wanted to get this patch ready for the next merge window. As
expected, some borderline non-trivial porting was needed, so I'm
posting the ported version in case anyone wants to review the patch
again. If I don't hear anything over the next few days, I'll plan to
merge this into lsm/dev later this week.
The SELinux test suite runs clean for both local and NFS test runs.
--
paul-moore.com
^ permalink raw reply
* [PATCH ported/repost v2] security,fs,nfs,net: update security_inode_listsecurity() interface
From: Paul Moore @ 2026-04-28 19:21 UTC (permalink / raw)
To: selinux, linux-security-module, linux-fsdevel, linux-nfs
Cc: stephen.smalley.work
In-Reply-To: <20250428195022.24587-2-stephen.smalley.work@gmail.com>
From: Stephen Smalley <stephen.smalley.work@gmail.com>
Update the security_inode_listsecurity() interface to allow
use of the xattr_list_one() helper and update the hook
implementations.
Link: https://lore.kernel.org/selinux/20250424152822.2719-1-stephen.smalley.work@gmail.com
Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com>
[PM: forward porting to bring this patch up to v7.1-rc1+]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
fs/nfs/nfs4proc.c | 7 ++-----
fs/xattr.c | 11 +++++++----
include/linux/lsm_hook_defs.h | 4 ++--
include/linux/security.h | 5 +++--
security/security.c | 16 ++++++++--------
security/selinux/hooks.c | 10 +++-------
security/smack/smack_lsm.c | 13 ++++---------
7 files changed, 29 insertions(+), 37 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a9b8d482d289..a16342056ae5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10562,13 +10562,10 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
left -= error;
}
- error2 = security_inode_listsecurity(d_inode(dentry), list, left);
+ error2 = security_inode_listsecurity(d_inode(dentry), &list, &left);
if (error2 < 0)
return error2;
- if (list) {
- list += error2;
- left -= error2;
- }
+ error2 = size - error - left;
error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left);
if (error3 < 0)
diff --git a/fs/xattr.c b/fs/xattr.c
index 09ecbaaa1660..0bc3b47e6936 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -510,9 +510,12 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size)
if (inode->i_op->listxattr) {
error = inode->i_op->listxattr(dentry, list, size);
} else {
- error = security_inode_listsecurity(inode, list, size);
- if (size && error > size)
- error = -ERANGE;
+ ssize_t remaining = size;
+
+ error = security_inode_listsecurity(inode, &list, &remaining);
+ if (error)
+ return error;
+ error = size - remaining;
}
return error;
}
@@ -1540,7 +1543,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (err)
return err;
- err = security_inode_listsecurity(inode, buffer, remaining_size);
+ err = security_inode_listsecurity(inode, &buffer, &remaining_size);
if (err < 0)
return err;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 2b8dfb35caed..65c9609ec207 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -176,8 +176,8 @@ LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct mnt_idmap *idmap,
struct inode *inode, const char *name, void **buffer, bool alloc)
LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode,
const char *name, const void *value, size_t size, int flags)
-LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer,
- size_t buffer_size)
+LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char **buffer,
+ ssize_t *remaining_size)
LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode,
struct lsm_prop *prop)
LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
diff --git a/include/linux/security.h b/include/linux/security.h
index 41d7367cf403..153e9043058f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -459,7 +459,7 @@ int security_inode_getsecurity(struct mnt_idmap *idmap,
struct inode *inode, const char *name,
void **buffer, bool alloc);
int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
-int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
+int security_inode_listsecurity(struct inode *inode, char **buffer, ssize_t *remaining_size);
void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop);
int security_inode_copy_up(struct dentry *src, struct cred **new);
int security_inode_copy_up_xattr(struct dentry *src, const char *name);
@@ -1097,7 +1097,8 @@ static inline int security_inode_setsecurity(struct inode *inode, const char *na
return -EOPNOTSUPP;
}
-static inline int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size)
+static inline int security_inode_listsecurity(struct inode *inode,
+ char **buffer, ssize_t *remaining_size)
{
return 0;
}
diff --git a/security/security.c b/security/security.c
index 4e999f023651..71aea8fdf014 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2258,22 +2258,22 @@ int security_inode_setsecurity(struct inode *inode, const char *name,
/**
* security_inode_listsecurity() - List the xattr security label names
* @inode: inode
- * @buffer: buffer
- * @buffer_size: size of buffer
+ * @buffer: pointer to buffer
+ * @remaining_size: pointer to remaining size of buffer
*
* Copy the extended attribute names for the security labels associated with
- * @inode into @buffer. The maximum size of @buffer is specified by
- * @buffer_size. @buffer may be NULL to request the size of the buffer
- * required.
+ * @inode into *(@buffer). The remaining size of @buffer is specified by
+ * *(@remaining_size). *(@buffer) may be NULL to request the size of the
+ * buffer required. Updates *(@buffer) and *(@remaining_size).
*
- * Return: Returns number of bytes used/required on success.
+ * Return: Returns 0 on success, or -errno on failure.
*/
int security_inode_listsecurity(struct inode *inode,
- char *buffer, size_t buffer_size)
+ char **buffer, ssize_t *remaining_size)
{
if (unlikely(IS_PRIVATE(inode)))
return 0;
- return call_int_hook(inode_listsecurity, inode, buffer, buffer_size);
+ return call_int_hook(inode_listsecurity, inode, buffer, remaining_size);
}
EXPORT_SYMBOL(security_inode_listsecurity);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 97801966bf32..4ae736755557 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3684,16 +3684,12 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name,
return 0;
}
-static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size)
+static int selinux_inode_listsecurity(struct inode *inode, char **buffer,
+ ssize_t *remaining_size)
{
- const int len = sizeof(XATTR_NAME_SELINUX);
-
if (!selinux_initialized())
return 0;
-
- if (buffer && len <= buffer_size)
- memcpy(buffer, XATTR_NAME_SELINUX, len);
- return len;
+ return xattr_list_one(buffer, remaining_size, XATTR_NAME_SELINUX);
}
static void selinux_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 3f9ae05039a2..ff115068c5c0 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1665,17 +1665,12 @@ static int smack_inode_getsecurity(struct mnt_idmap *idmap,
* smack_inode_listsecurity - list the Smack attributes
* @inode: the object
* @buffer: where they go
- * @buffer_size: size of buffer
+ * @remaining_size: size of buffer
*/
-static int smack_inode_listsecurity(struct inode *inode, char *buffer,
- size_t buffer_size)
+static int smack_inode_listsecurity(struct inode *inode, char **buffer,
+ ssize_t *remaining_size)
{
- int len = sizeof(XATTR_NAME_SMACK);
-
- if (buffer != NULL && len <= buffer_size)
- memcpy(buffer, XATTR_NAME_SMACK, len);
-
- return len;
+ return xattr_list_one(buffer, remaining_size, XATTR_NAME_SMACK);
}
/**
--
2.54.0
^ permalink raw reply related
* Re: [PATCH bpf-next 1/2] bpf: add bpf_init_inode_xattr kfunc for atomic inode labeling
From: Kumar Kartikeya Dwivedi @ 2026-04-28 16:33 UTC (permalink / raw)
To: Matt Bobrowski
Cc: Song Liu, David Windsor, Alexander Viro, Christian Brauner,
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Eduard Zingerman, KP Singh, Paul Moore, James Morris,
Serge E. Hallyn, Jan Kara, John Fastabend, Martin KaFai Lau,
Yonghong Song, Jiri Olsa, linux-fsdevel, linux-kernel, bpf,
linux-security-module
In-Reply-To: <afCUADdbNrpbEPMa@google.com>
On Tue, 28 Apr 2026 at 13:03, Matt Bobrowski <mattbobrowski@google.com> wrote:
>
> On Mon, Apr 27, 2026 at 04:33:18PM +0200, Kumar Kartikeya Dwivedi wrote:
> > On Mon, 27 Apr 2026 at 16:21, Song Liu <song@kernel.org> wrote:
> > >
> > > On Mon, Apr 27, 2026 at 11:11 AM Matt Bobrowski
> > > <mattbobrowski@google.com> wrote:
> n> > >
> > > > On Mon, Apr 27, 2026 at 05:32:47AM +0200, Kumar Kartikeya Dwivedi wrote:
> > > > > On Mon, 27 Apr 2026 at 05:24, David Windsor <dwindsor@gmail.com> wrote:
> > > > > >
> > > > > > On Sun, Apr 26, 2026 at 10:57 PM Kumar Kartikeya Dwivedi
> > > > > > <memxor@gmail.com> wrote:
> > > > > > >
> > > > > > > On Mon, 27 Apr 2026 at 02:16, David Windsor <dwindsor@gmail.com> wrote:
> > > > > > > >
> > > > > > > > Add bpf_init_inode_xattr() kfunc for BPF LSM programs to atomically set
> > > > > > > > xattrs via inode_init_security hook using lsm_get_xattr_slot().
> > > > > > > >
> > > > > > > > lsm_get_xattr_slot() claims a slot by writing to xattr_count, which BPF
> > > > > > > > programs cannot do: hook arguments are not directly writable from BPF.
> > > > > > > > To hide this, the BPF-facing API is just bpf_init_inode_xattr(name,
> > > > > > > > value), and the verifier transparently rewrites each call into
> > > > > > > > bpf_init_inode_xattr_impl(xattrs, xattr_count, name, value). xattrs and
> > > > > > > > xattr_count are extracted from the hook context, which the verifier
> > > > > > > > spills to the stack at program entry since R1 is clobbered during normal
> > > > > > > > execution.
> > > > > > > >
> > > > > > > > A previous attempt [1] required a kmalloc string output protocol for
> > > > > > > > the xattr name. Since commit 6bcdfd2cac55 ("security: Allow all LSMs to
> > > > > > > > provide xattrs for inode_init_security hook") [2], the xattr name is no
> > > > > > > > longer allocated; it is a static constant. We take advantage of this by
> > > > > > > > passing the name directly. Because we rely on the hook-specific ctx
> > > > > > > > layout, the kfunc is restricted to lsm/inode_init_security.
> > > > > > > >
> > > > > > > > Link: https://kernsec.org/pipermail/linux-security-module-archive/2022-October/034878.html [1]
> > > > > > > > Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6bcdfd2cac55 [2]
> > > > > > > > Suggested-by: Song Liu <song@kernel.org>
> > > > > > > > Signed-off-by: David Windsor <dwindsor@gmail.com>
> > > > > > > > ---
> > > > > > >
> > > > > > > The explanation and code make no sense to me. Why not pass xattrs and
> > > > > > > xattr_count directly as arguments, even if you choose to restrict the
> > > > > > > kfunc to a specific hook? Why does the verifier core need the hack to
> > > > > > > spill the context and extract the two arguments?
> > > > > > >
> > > > > >
> > > > > > xattr_count is an output parameter; we cannot currently write to it in
> > > > > > bpf as there is no verifier support for writing to int *. xattrs and
> > > > > > xattr_count can be fixed up by the verifier, so we only require the
> > > > > > user to pass the name and value.
> > > > >
> > > > > Sure, but the kfunc can. Did you try passing them in directly?
> > > > > If that doesn't work for some reason, we should fix it instead.
> > > >
> > > > Hm, perhaps this fixup approach might be the simplest in order to
> > > > assure the needed safety?
> > >
> > > +1. I think this is the best approach I can think of.
> >
> > We're not going to add more and more special cases to the verifier.
> > The whole approach is unscalable.
>
> Totally fair of you to push back here. I'm also agreement with you on
> the fact that extending the BPF verifier with such special casing
> doesn't scale all that well.
>
> > If the concern is that int xattr_count passed for xattrs can be
> > unrelated int pointer obtained from elsewhere, can we pack the xattrs
> > and xattr_count into a struct and pass it as an argument to the LSM?
> > Then the pair struct can be passed in directly, ensuring both
> > originate from the arguments passed to the LSM. That should eliminate
> > concerns about either being out of sync if obtained from different
> > sources.
>
> This could work, but we'd also need to modify all the other
> pre-existing hook implementations along with the core
> security_inode_init_security() LSM hook itself. I don't think that'd
> be an issue. The biggest hurdle here I think would be convincing the
> LSM maintainers themselves.
Yeah, when these parameters were introduced, we changed all LSMs, so I
don't see why we cannot adjust things again to benefit this use case.
>
> > Even if we wanted to ensure argument provenance was stuff loaded from
> > context, the right solution would be some kfunc flag that constraints
> > the argument to be derived by following the ctx pointer, not whatever
> > is done in this patch.
>
> OK, so it is provenance-like tracking which you were initially kinda
> alluding to here. Currently, I don't believe that PTR_TO_CTX is
> preserved upon any subsequent R1 (ctx) dereferences, so we'd need to
> think about how this type could be preserved such that we can enforce
> this kinda constraint (__ctx) at the time which the new BPF kfunc is
> called. Do you have any ideas on how to do this?
I think we'll have to track in the register whether the PTR_TO_BTF_ID
came from a PTR_TO_CTX load. That said, I still prefer changing the
prototype to pack the array and its output size parameter together. It
is even clearer to have a well named type than int *xattr_count in the
prototype.
^ permalink raw reply
* Re: [RFC PATCH v2 1/4] security: ima: call ima_init() again at late_initcall_sync for defered TPM
From: Yeoreum Yun @ 2026-04-28 13:21 UTC (permalink / raw)
To: Paul Moore
Cc: Mimi Zohar, roberto.sassu, Jonathan McDowell,
linux-security-module, linux-kernel, linux-integrity,
linux-arm-kernel, kvmarm, jmorris, serge, dmitry.kasatkin,
eric.snowberg, jarkko, jgg, sudeep.holla, maz, oupton, joey.gouly,
suzuki.poulose, yuzenghui, catalin.marinas, will, noodles,
sebastianene
In-Reply-To: <CAHC9VhS_WgwhW_NDO91LoTeSzdieGqbwqnwPq8KpavH1_Lwi7g@mail.gmail.com>
Hi Paul,
> On Fri, Apr 24, 2026 at 6:49 PM Mimi Zohar <zohar@linux.ibm.com> wrote:
> > On Fri, 2026-04-24 at 18:10 -0400, Paul Moore wrote:
> > > (I'm assuming you meant initcall and not syscall above, but if you're
> > > talking about something else, please let me know.)
> > >
> > > Saying that you aren't comfortable moving IMA initialization to
> > > late-sync is inconsistent with allowing IMA initialization to be
> > > deferred to late-sync. Either it is okay to initialize IMA in
> > > late-sync or it isn't. You must pick one.
> >
> > Yes, we're discussing late_initcall and late_initcall_sync.
> >
> > I prefer to look at it as being pragmatic. I'd rather err on the side of caution
> > and not move the syscall to late_initcall_sync, than move it.
>
> If you were truly erring on the side of caution you wouldn't allow
> late-sync initialization without knowing if it was safe or not.
> Determine whether IMA initialization is safe at late-sync. If it is
> safe, move the init to late-sync; if not, keep it at late and figure
> out another mechanism to sync with the TPM availability. If needed,
> you could probably use the LSM notifier to enable the TPM driver to
> signal when it is up and running.
I don't think LSM notifier wouldn't be good since it a one time
notification for initailisation and it wouldn't tell properly
whehter TPM isn't present in system or present unless functions
ima_init() are rewritten to discern the "TPM deferred" and
"TPM doesn't exist" in the system (e.x) boot-aggregate log creation.
One question, though.
In the end, for systems where the TPM has already been probed by late_initcall(),
init_ima() continues to be called at late_initcall(), while the above approach
is introduced for systems where the TPM is not properly initialized by that point.
If init_ima(), which used to be called at late_initcall(),
were instead called at late_initcall_sync(), could this break system integration?
In my view, both late_initcall and late_initcall_sync run during the do_basic_setup() phase,
so it doesn’t seem like this would cause tampering or affect things like the creation of the boot-aggregate log.
Is there any particular reason why init_ima() must be called specifically at late_initcall()?
Thanks.
--
Sincerely,
Yeoreum Yun
^ permalink raw reply
* Re: [PATCH bpf-next 1/2] bpf: add bpf_init_inode_xattr kfunc for atomic inode labeling
From: Matt Bobrowski @ 2026-04-28 11:03 UTC (permalink / raw)
To: Kumar Kartikeya Dwivedi
Cc: Song Liu, David Windsor, Alexander Viro, Christian Brauner,
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Eduard Zingerman, KP Singh, Paul Moore, James Morris,
Serge E. Hallyn, Jan Kara, John Fastabend, Martin KaFai Lau,
Yonghong Song, Jiri Olsa, linux-fsdevel, linux-kernel, bpf,
linux-security-module
In-Reply-To: <CAP01T75SBw6NNvBKyPW11JSYY2oh449yoBsWi_GOBR5Kq1ykmw@mail.gmail.com>
On Mon, Apr 27, 2026 at 04:33:18PM +0200, Kumar Kartikeya Dwivedi wrote:
> On Mon, 27 Apr 2026 at 16:21, Song Liu <song@kernel.org> wrote:
> >
> > On Mon, Apr 27, 2026 at 11:11 AM Matt Bobrowski
> > <mattbobrowski@google.com> wrote:
n> > >
> > > On Mon, Apr 27, 2026 at 05:32:47AM +0200, Kumar Kartikeya Dwivedi wrote:
> > > > On Mon, 27 Apr 2026 at 05:24, David Windsor <dwindsor@gmail.com> wrote:
> > > > >
> > > > > On Sun, Apr 26, 2026 at 10:57 PM Kumar Kartikeya Dwivedi
> > > > > <memxor@gmail.com> wrote:
> > > > > >
> > > > > > On Mon, 27 Apr 2026 at 02:16, David Windsor <dwindsor@gmail.com> wrote:
> > > > > > >
> > > > > > > Add bpf_init_inode_xattr() kfunc for BPF LSM programs to atomically set
> > > > > > > xattrs via inode_init_security hook using lsm_get_xattr_slot().
> > > > > > >
> > > > > > > lsm_get_xattr_slot() claims a slot by writing to xattr_count, which BPF
> > > > > > > programs cannot do: hook arguments are not directly writable from BPF.
> > > > > > > To hide this, the BPF-facing API is just bpf_init_inode_xattr(name,
> > > > > > > value), and the verifier transparently rewrites each call into
> > > > > > > bpf_init_inode_xattr_impl(xattrs, xattr_count, name, value). xattrs and
> > > > > > > xattr_count are extracted from the hook context, which the verifier
> > > > > > > spills to the stack at program entry since R1 is clobbered during normal
> > > > > > > execution.
> > > > > > >
> > > > > > > A previous attempt [1] required a kmalloc string output protocol for
> > > > > > > the xattr name. Since commit 6bcdfd2cac55 ("security: Allow all LSMs to
> > > > > > > provide xattrs for inode_init_security hook") [2], the xattr name is no
> > > > > > > longer allocated; it is a static constant. We take advantage of this by
> > > > > > > passing the name directly. Because we rely on the hook-specific ctx
> > > > > > > layout, the kfunc is restricted to lsm/inode_init_security.
> > > > > > >
> > > > > > > Link: https://kernsec.org/pipermail/linux-security-module-archive/2022-October/034878.html [1]
> > > > > > > Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6bcdfd2cac55 [2]
> > > > > > > Suggested-by: Song Liu <song@kernel.org>
> > > > > > > Signed-off-by: David Windsor <dwindsor@gmail.com>
> > > > > > > ---
> > > > > >
> > > > > > The explanation and code make no sense to me. Why not pass xattrs and
> > > > > > xattr_count directly as arguments, even if you choose to restrict the
> > > > > > kfunc to a specific hook? Why does the verifier core need the hack to
> > > > > > spill the context and extract the two arguments?
> > > > > >
> > > > >
> > > > > xattr_count is an output parameter; we cannot currently write to it in
> > > > > bpf as there is no verifier support for writing to int *. xattrs and
> > > > > xattr_count can be fixed up by the verifier, so we only require the
> > > > > user to pass the name and value.
> > > >
> > > > Sure, but the kfunc can. Did you try passing them in directly?
> > > > If that doesn't work for some reason, we should fix it instead.
> > >
> > > Hm, perhaps this fixup approach might be the simplest in order to
> > > assure the needed safety?
> >
> > +1. I think this is the best approach I can think of.
>
> We're not going to add more and more special cases to the verifier.
> The whole approach is unscalable.
Totally fair of you to push back here. I'm also agreement with you on
the fact that extending the BPF verifier with such special casing
doesn't scale all that well.
> If the concern is that int xattr_count passed for xattrs can be
> unrelated int pointer obtained from elsewhere, can we pack the xattrs
> and xattr_count into a struct and pass it as an argument to the LSM?
> Then the pair struct can be passed in directly, ensuring both
> originate from the arguments passed to the LSM. That should eliminate
> concerns about either being out of sync if obtained from different
> sources.
This could work, but we'd also need to modify all the other
pre-existing hook implementations along with the core
security_inode_init_security() LSM hook itself. I don't think that'd
be an issue. The biggest hurdle here I think would be convincing the
LSM maintainers themselves.
> Even if we wanted to ensure argument provenance was stuff loaded from
> context, the right solution would be some kfunc flag that constraints
> the argument to be derived by following the ctx pointer, not whatever
> is done in this patch.
OK, so it is provenance-like tracking which you were initially kinda
alluding to here. Currently, I don't believe that PTR_TO_CTX is
preserved upon any subsequent R1 (ctx) dereferences, so we'd need to
think about how this type could be preserved such that we can enforce
this kinda constraint (__ctx) at the time which the new BPF kfunc is
called. Do you have any ideas on how to do this?
^ permalink raw reply
* Re: [RFC PATCH v2 1/4] security: ima: call ima_init() again at late_initcall_sync for defered TPM
From: Paul Moore @ 2026-04-28 1:31 UTC (permalink / raw)
To: Yeoreum Yun
Cc: Mimi Zohar, roberto.sassu, Jonathan McDowell,
linux-security-module, linux-kernel, linux-integrity,
linux-arm-kernel, kvmarm, jmorris, serge, dmitry.kasatkin,
eric.snowberg, jarkko, jgg, sudeep.holla, maz, oupton, joey.gouly,
suzuki.poulose, yuzenghui, catalin.marinas, will, noodles,
sebastianene
In-Reply-To: <aexIwJpno3iPIdRD@e129823.arm.com>
On Sat, Apr 25, 2026 at 12:53 AM Yeoreum Yun <yeoreum.yun@arm.com> wrote:
> > > I understand the need to ensure that the TPM is available, but if it
> > > isn't safe to wait to initialize IMA at late_initcall_sync() then it
> > > would seem like this is a bad option and we need another mechanism to
> > > synchronize IMA with TPM devices. If it is safe to initalize IMA in
> > > late_initcall_sync(), just do that and be done with it.
> >
> > Within the same initcall level there is no way of ordering the initialization.
> > Yeorum attempted to address the ordering issue in commit 0e0546eabcd6
> > ("firmware: arm_ffa: Change initcall level of ffa_init() to rootfs_initcall"),
> > which is being reverted in this patch set.
> >
> > Ordering within an initcall level needs to be fixed, but for now retrying at
> > late_initcall_sync works for some, hopefully most, cases.
>
> Ordering within an initcall level is not good idea.
Agreed. That's why we have the different initcall levels.
--
paul-moore.com
^ permalink raw reply
* Re: [RFC PATCH v2 1/4] security: ima: call ima_init() again at late_initcall_sync for defered TPM
From: Paul Moore @ 2026-04-28 1:31 UTC (permalink / raw)
To: Mimi Zohar
Cc: Yeoreum Yun, roberto.sassu, Jonathan McDowell,
linux-security-module, linux-kernel, linux-integrity,
linux-arm-kernel, kvmarm, jmorris, serge, dmitry.kasatkin,
eric.snowberg, jarkko, jgg, sudeep.holla, maz, oupton, joey.gouly,
suzuki.poulose, yuzenghui, catalin.marinas, will, noodles,
sebastianene
In-Reply-To: <1e51c2fd090e5ceb07b1d09e50650c70fd3ccdb1.camel@linux.ibm.com>
On Fri, Apr 24, 2026 at 6:49 PM Mimi Zohar <zohar@linux.ibm.com> wrote:
> On Fri, 2026-04-24 at 18:10 -0400, Paul Moore wrote:
> > (I'm assuming you meant initcall and not syscall above, but if you're
> > talking about something else, please let me know.)
> >
> > Saying that you aren't comfortable moving IMA initialization to
> > late-sync is inconsistent with allowing IMA initialization to be
> > deferred to late-sync. Either it is okay to initialize IMA in
> > late-sync or it isn't. You must pick one.
>
> Yes, we're discussing late_initcall and late_initcall_sync.
>
> I prefer to look at it as being pragmatic. I'd rather err on the side of caution
> and not move the syscall to late_initcall_sync, than move it.
If you were truly erring on the side of caution you wouldn't allow
late-sync initialization without knowing if it was safe or not.
Determine whether IMA initialization is safe at late-sync. If it is
safe, move the init to late-sync; if not, keep it at late and figure
out another mechanism to sync with the TPM availability. If needed,
you could probably use the LSM notifier to enable the TPM driver to
signal when it is up and running.
--
paul-moore.com
^ permalink raw reply
* Re: [PATCH] selinux: don't reserve xattr slot when we won't fill it
From: Paul Moore @ 2026-04-27 23:32 UTC (permalink / raw)
To: David Windsor, Stephen Smalley
Cc: Ondrej Mosnacek, selinux, linux-security-module, linux-kernel
In-Reply-To: <20260426232349.844289-1-dwindsor@gmail.com>
On Apr 26, 2026 David Windsor <dwindsor@gmail.com> wrote:
>
> Move lsm_get_xattr_slot() below the SBLABEL_MNT check so we don't leave
> a NULL-named slot in the array when returning -EOPNOTSUPP; filesystem
> initxattrs() callbacks stop iterating at the first NULL ->name, silently
> dropping xattrs installed by later LSMs.
>
> Signed-off-by: David Windsor <dwindsor@gmail.com>
> ---
> security/selinux/hooks.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
Good catch, thanks. These seems like a stable candidate so I've merged
it into selinux/stable-7.1 and we likely send it up to Linus later this
week.
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 97801966bf32..4ff118a9395f 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -2966,7 +2966,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
> {
> const struct cred_security_struct *crsec = selinux_cred(current_cred());
> struct superblock_security_struct *sbsec;
> - struct xattr *xattr = lsm_get_xattr_slot(xattrs, xattr_count);
> + struct xattr *xattr;
> u32 newsid, clen;
> u16 newsclass;
> int rc;
> @@ -2992,6 +2992,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
> !(sbsec->flags & SBLABEL_MNT))
> return -EOPNOTSUPP;
>
> + xattr = lsm_get_xattr_slot(xattrs, xattr_count);
> if (xattr) {
> rc = security_sid_to_context_force(newsid,
> &context, &clen);
>
> base-commit: 254f49634ee16a731174d2ae34bc50bd5f45e731
> --
> 2.53.0
--
paul-moore.com
^ permalink raw reply
* Re: [RFC PATCH v1 01/11] security: add LSM blob and hooks for namespaces
From: Paul Moore @ 2026-04-27 21:46 UTC (permalink / raw)
To: Christian Brauner
Cc: Mickaël Salaün, Günther Noack, Serge E . Hallyn,
Justin Suess, Lennart Poettering, Mikhail Ivanov,
Nicolas Bouchinet, Shervin Oloumi, Tingmao Wang, kernel-team,
linux-fsdevel, linux-kernel, linux-security-module
In-Reply-To: <20260427-belegen-euren-997f91347820@brauner>
On Mon, Apr 27, 2026 at 10:57 AM Christian Brauner <brauner@kernel.org> wrote:
> On Fri, Apr 24, 2026 at 03:28:44PM -0400, Paul Moore wrote:
> > On Fri, Apr 24, 2026 at 2:56 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Wed, Apr 22, 2026 at 08:19:59PM -0400, Paul Moore wrote:
> > > > On Thu, Mar 12, 2026 at 6:05 AM Mickaël Salaün <mic@digikod.net> wrote:
> > > > >
> > > > > From: Christian Brauner <brauner@kernel.org>
> > > > >
> > > > > All namespace types now share the same ns_common infrastructure. Extend
> > > > > this to include a security blob so LSMs can start managing namespaces
> > > > > uniformly without having to add one-off hooks or security fields to
> > > > > every individual namespace type.
> > > > >
> > > > > Add a ns_security pointer to ns_common and the corresponding lbs_ns
> > > > > blob size to lsm_blob_sizes. Allocation and freeing hooks are called
> > > > > from the common __ns_common_init() and __ns_common_free() paths so
> > > > > every namespace type gets covered in one go. All information about the
> > > > > namespace type and the appropriate casting helpers to get at the
> > > > > containing namespace are available via ns_common making it
> > > > > straightforward for LSMs to differentiate when they need to.
> > > > >
> > > > > A namespace_install hook is called from validate_ns() during setns(2)
> > > > > giving LSMs a chance to enforce policy on namespace transitions.
> > > > >
> > > > > Individual namespace types can still have their own specialized security
> > > > > hooks when needed. This is just the common baseline that makes it easy
> > > > > to track and manage namespaces from the security side without requiring
> > > > > every namespace type to reinvent the wheel.
> > > > >
> > > > > Cc: Günther Noack <gnoack@google.com>
> > > > > Cc: Paul Moore <paul@paul-moore.com>
> > > > > Cc: Serge E. Hallyn <serge@hallyn.com>
> > > > > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > > > > Link: https://lore.kernel.org/r/20260216-work-security-namespace-v1-1-075c28758e1f@kernel.org
> > > > > ---
> > > > > include/linux/lsm_hook_defs.h | 3 ++
> > > > > include/linux/lsm_hooks.h | 1 +
> > > > > include/linux/ns/ns_common_types.h | 3 ++
> > > > > include/linux/security.h | 20 ++++++++
> > > > > kernel/nscommon.c | 12 +++++
> > > > > kernel/nsproxy.c | 8 +++-
> > > > > security/lsm_init.c | 2 +
> > > > > security/security.c | 76 ++++++++++++++++++++++++++++++
> > > > > 8 files changed, 124 insertions(+), 1 deletion(-)
> >
> > ...
> >
> > > > > diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> > > > > index 259c4b4f1eeb..f0b30d1907e7 100644
> > > > > --- a/kernel/nsproxy.c
> > > > > +++ b/kernel/nsproxy.c
> > > > > @@ -379,7 +379,13 @@ static int prepare_nsset(unsigned flags, struct nsset *nsset)
> > > > >
> > > > > static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
> > > > > {
> > > > > - return ns->ops->install(nsset, ns);
> > > > > + int ret;
> > > > > +
> > > > > + ret = ns->ops->install(nsset, ns);
> > > > > + if (ret)
> > > > > + return ret;
> > > > > +
> > > > > + return security_namespace_install(nsset, ns);
> > > > > }
> > > >
> > > > Do we also want a security_namespace_switch() called from within
> > > > switch_task_namespaces()? Of course LSMs would not be able to fail or
> > > > return an error at that point, but it seems reasonable that LSMs might
> > > > want to update LSM state associated with the current task once the
> > > > namespaces have been changed. This is similar to all the "_post_" LSM
> > > > hooks we have for various operations in the VFS and network layers.
> > >
> > > What cannot be infered from security_namespace_install()?
> >
> > We don't actually know if the namespace is attached to a process until
> > we get to switch_task_namespaces().
> >
> > Now that I'm looking at this again, why is the
> > security_namespace_install() call placed after the ns->ops->install()
> > call? From an access control perspective we want the LSM hook before
>
> See https://lore.kernel.org/20260325-filmverleih-auffressen-e897fcf8d3f2@brauner
> where I requested the order to be changed.
So ... does anyone not want this moved? It's time to speak up :)
--
paul-moore.com
^ permalink raw reply
* Re: [PATCH v2 0/4] Firmware LSM hook
From: Leon Romanovsky @ 2026-04-27 19:09 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Paul Moore, Roberto Sassu, KP Singh, Matt Bobrowski,
Alexei Starovoitov, Daniel Borkmann, John Fastabend,
Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
Yonghong Song, Stanislav Fomichev, Hao Luo, Jiri Olsa, Shuah Khan,
Saeed Mahameed, Itay Avraham, Dave Jiang, Jonathan Cameron, bpf,
linux-kernel, linux-kselftest, linux-rdma, Chiara Meiohas,
Maher Sanalla, linux-security-module
In-Reply-To: <20260426134224.GC3501894@ziepe.ca>
On Sun, Apr 26, 2026 at 10:42:24AM -0300, Jason Gunthorpe wrote:
> On Sun, Apr 26, 2026 at 01:39:57PM +0300, Leon Romanovsky wrote:
> > On Fri, Apr 24, 2026 at 11:19:21AM -0300, Jason Gunthorpe wrote:
> > > On Thu, Apr 23, 2026 at 05:09:50PM +0300, Leon Romanovsky wrote:
> > >
> > > > > > Leon mentioned that different firmware revisions would have different
> > > > > > parameters for a given opcode, and that one would need to inspect
> > > > > > those parameters to properly filter the command. Is that not true, or
> > > > > > am I misreading or misunderstanding Leon's comments?
> > > > >
> > > > > They are ABI stable, so there will be rules about future changes that
> > > > > old software can follow to ignore or reject future things it doesn't
> > > > > understand.
> > > >
> > > > It is wishful thinking and applicable only to mlx5 devices. No one
> > > > promises that other devices follow same ABI rules.
> > >
> > > Well, I will definately kick them out of fwctl if they don't.
> >
> > It is easy to say but harder to follow. The kernel includes many devices that
> > exist only in specific hyperscale environments, where the update cycle is
> > tightly controlled. They easily can break FW backward compatibility.
>
> Well Linus's rule applies here, if it doesn't bother anyone it didn't
> break..
Great, that means they can load any BPF program they want and access whatever
firmware fields they choose. Your earlier claim about 'not breaking FW
compatibility' is only partially correct.
Thanks
>
> Jason
>
^ permalink raw reply
* Re: [PATCH bpf-next 1/2] bpf: add bpf_init_inode_xattr kfunc for atomic inode labeling
From: Song Liu @ 2026-04-27 17:17 UTC (permalink / raw)
To: Kumar Kartikeya Dwivedi
Cc: Matt Bobrowski, David Windsor, Alexander Viro, Christian Brauner,
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Eduard Zingerman, KP Singh, Paul Moore, James Morris,
Serge E. Hallyn, Jan Kara, John Fastabend, Martin KaFai Lau,
Yonghong Song, Jiri Olsa, linux-fsdevel, linux-kernel, bpf,
linux-security-module
In-Reply-To: <CAP01T75SBw6NNvBKyPW11JSYY2oh449yoBsWi_GOBR5Kq1ykmw@mail.gmail.com>
On Mon, Apr 27, 2026 at 3:33 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Mon, 27 Apr 2026 at 16:21, Song Liu <song@kernel.org> wrote:
[...]
> > > Hm, perhaps this fixup approach might be the simplest in order to
> > > assure the needed safety?
> >
> > +1. I think this is the best approach I can think of.
>
> We're not going to add more and more special cases to the verifier.
> The whole approach is unscalable.
Agreed this is not scalable. One potential solution to this scalability
issue is to move the fixup logic to struct btf_kfunc_id_set, so that this
fixup logic is distributed.
> If the concern is that int xattr_count passed for xattrs can be
> unrelated int pointer obtained from elsewhere, can we pack the xattrs
> and xattr_count into a struct and pass it as an argument to the LSM?
> Then the pair struct can be passed in directly, ensuring both
> originate from the arguments passed to the LSM. That should eliminate
> concerns about either being out of sync if obtained from different
> sources.
I think a trusted pointer of the pair struct will also work. But this means
we need to refactor the LSM hook and other LSMs. The refactoring is
not difficult though.
> Even if we wanted to ensure argument provenance was stuff loaded from
> context, the right solution would be some kfunc flag that constraints
> the argument to be derived by following the ctx pointer, not whatever
> is done in this patch.
We need these two arguments to be the specific fields in the ctx. I am
not sure how to do this with kfunc flags.
Thanks,
Song
^ permalink raw reply
* Re: [RFC PATCH v1 01/11] security: add LSM blob and hooks for namespaces
From: Christian Brauner @ 2026-04-27 14:57 UTC (permalink / raw)
To: Paul Moore
Cc: Mickaël Salaün, Günther Noack, Serge E . Hallyn,
Justin Suess, Lennart Poettering, Mikhail Ivanov,
Nicolas Bouchinet, Shervin Oloumi, Tingmao Wang, kernel-team,
linux-fsdevel, linux-kernel, linux-security-module
In-Reply-To: <CAHC9VhRcokUR0ZKzCuZnZAyaFEMd6EH93BE3OTTKHY9Mo9pVkQ@mail.gmail.com>
On Fri, Apr 24, 2026 at 03:28:44PM -0400, Paul Moore wrote:
> On Fri, Apr 24, 2026 at 2:56 PM Mickaël Salaün <mic@digikod.net> wrote:
> > On Wed, Apr 22, 2026 at 08:19:59PM -0400, Paul Moore wrote:
> > > On Thu, Mar 12, 2026 at 6:05 AM Mickaël Salaün <mic@digikod.net> wrote:
> > > >
> > > > From: Christian Brauner <brauner@kernel.org>
> > > >
> > > > All namespace types now share the same ns_common infrastructure. Extend
> > > > this to include a security blob so LSMs can start managing namespaces
> > > > uniformly without having to add one-off hooks or security fields to
> > > > every individual namespace type.
> > > >
> > > > Add a ns_security pointer to ns_common and the corresponding lbs_ns
> > > > blob size to lsm_blob_sizes. Allocation and freeing hooks are called
> > > > from the common __ns_common_init() and __ns_common_free() paths so
> > > > every namespace type gets covered in one go. All information about the
> > > > namespace type and the appropriate casting helpers to get at the
> > > > containing namespace are available via ns_common making it
> > > > straightforward for LSMs to differentiate when they need to.
> > > >
> > > > A namespace_install hook is called from validate_ns() during setns(2)
> > > > giving LSMs a chance to enforce policy on namespace transitions.
> > > >
> > > > Individual namespace types can still have their own specialized security
> > > > hooks when needed. This is just the common baseline that makes it easy
> > > > to track and manage namespaces from the security side without requiring
> > > > every namespace type to reinvent the wheel.
> > > >
> > > > Cc: Günther Noack <gnoack@google.com>
> > > > Cc: Paul Moore <paul@paul-moore.com>
> > > > Cc: Serge E. Hallyn <serge@hallyn.com>
> > > > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > > > Link: https://lore.kernel.org/r/20260216-work-security-namespace-v1-1-075c28758e1f@kernel.org
> > > > ---
> > > > include/linux/lsm_hook_defs.h | 3 ++
> > > > include/linux/lsm_hooks.h | 1 +
> > > > include/linux/ns/ns_common_types.h | 3 ++
> > > > include/linux/security.h | 20 ++++++++
> > > > kernel/nscommon.c | 12 +++++
> > > > kernel/nsproxy.c | 8 +++-
> > > > security/lsm_init.c | 2 +
> > > > security/security.c | 76 ++++++++++++++++++++++++++++++
> > > > 8 files changed, 124 insertions(+), 1 deletion(-)
>
> ...
>
> > > > diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> > > > index 259c4b4f1eeb..f0b30d1907e7 100644
> > > > --- a/kernel/nsproxy.c
> > > > +++ b/kernel/nsproxy.c
> > > > @@ -379,7 +379,13 @@ static int prepare_nsset(unsigned flags, struct nsset *nsset)
> > > >
> > > > static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
> > > > {
> > > > - return ns->ops->install(nsset, ns);
> > > > + int ret;
> > > > +
> > > > + ret = ns->ops->install(nsset, ns);
> > > > + if (ret)
> > > > + return ret;
> > > > +
> > > > + return security_namespace_install(nsset, ns);
> > > > }
> > >
> > > Do we also want a security_namespace_switch() called from within
> > > switch_task_namespaces()? Of course LSMs would not be able to fail or
> > > return an error at that point, but it seems reasonable that LSMs might
> > > want to update LSM state associated with the current task once the
> > > namespaces have been changed. This is similar to all the "_post_" LSM
> > > hooks we have for various operations in the VFS and network layers.
> >
> > What cannot be infered from security_namespace_install()?
>
> We don't actually know if the namespace is attached to a process until
> we get to switch_task_namespaces().
>
> Now that I'm looking at this again, why is the
> security_namespace_install() call placed after the ns->ops->install()
> call? From an access control perspective we want the LSM hook before
See https://lore.kernel.org/20260325-filmverleih-auffressen-e897fcf8d3f2@brauner
where I requested the order to be changed.
^ permalink raw reply
* Re: [PATCH bpf-next 1/2] bpf: add bpf_init_inode_xattr kfunc for atomic inode labeling
From: Kumar Kartikeya Dwivedi @ 2026-04-27 14:33 UTC (permalink / raw)
To: Song Liu
Cc: Matt Bobrowski, David Windsor, Alexander Viro, Christian Brauner,
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Eduard Zingerman, KP Singh, Paul Moore, James Morris,
Serge E. Hallyn, Jan Kara, John Fastabend, Martin KaFai Lau,
Yonghong Song, Jiri Olsa, linux-fsdevel, linux-kernel, bpf,
linux-security-module
In-Reply-To: <CAPhsuW60SRXZbFYRDm-QaTiB8tTtJD_N4jk1=680x5UsZmpj9w@mail.gmail.com>
On Mon, 27 Apr 2026 at 16:21, Song Liu <song@kernel.org> wrote:
>
> On Mon, Apr 27, 2026 at 11:11 AM Matt Bobrowski
> <mattbobrowski@google.com> wrote:
> >
> > On Mon, Apr 27, 2026 at 05:32:47AM +0200, Kumar Kartikeya Dwivedi wrote:
> > > On Mon, 27 Apr 2026 at 05:24, David Windsor <dwindsor@gmail.com> wrote:
> > > >
> > > > On Sun, Apr 26, 2026 at 10:57 PM Kumar Kartikeya Dwivedi
> > > > <memxor@gmail.com> wrote:
> > > > >
> > > > > On Mon, 27 Apr 2026 at 02:16, David Windsor <dwindsor@gmail.com> wrote:
> > > > > >
> > > > > > Add bpf_init_inode_xattr() kfunc for BPF LSM programs to atomically set
> > > > > > xattrs via inode_init_security hook using lsm_get_xattr_slot().
> > > > > >
> > > > > > lsm_get_xattr_slot() claims a slot by writing to xattr_count, which BPF
> > > > > > programs cannot do: hook arguments are not directly writable from BPF.
> > > > > > To hide this, the BPF-facing API is just bpf_init_inode_xattr(name,
> > > > > > value), and the verifier transparently rewrites each call into
> > > > > > bpf_init_inode_xattr_impl(xattrs, xattr_count, name, value). xattrs and
> > > > > > xattr_count are extracted from the hook context, which the verifier
> > > > > > spills to the stack at program entry since R1 is clobbered during normal
> > > > > > execution.
> > > > > >
> > > > > > A previous attempt [1] required a kmalloc string output protocol for
> > > > > > the xattr name. Since commit 6bcdfd2cac55 ("security: Allow all LSMs to
> > > > > > provide xattrs for inode_init_security hook") [2], the xattr name is no
> > > > > > longer allocated; it is a static constant. We take advantage of this by
> > > > > > passing the name directly. Because we rely on the hook-specific ctx
> > > > > > layout, the kfunc is restricted to lsm/inode_init_security.
> > > > > >
> > > > > > Link: https://kernsec.org/pipermail/linux-security-module-archive/2022-October/034878.html [1]
> > > > > > Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6bcdfd2cac55 [2]
> > > > > > Suggested-by: Song Liu <song@kernel.org>
> > > > > > Signed-off-by: David Windsor <dwindsor@gmail.com>
> > > > > > ---
> > > > >
> > > > > The explanation and code make no sense to me. Why not pass xattrs and
> > > > > xattr_count directly as arguments, even if you choose to restrict the
> > > > > kfunc to a specific hook? Why does the verifier core need the hack to
> > > > > spill the context and extract the two arguments?
> > > > >
> > > >
> > > > xattr_count is an output parameter; we cannot currently write to it in
> > > > bpf as there is no verifier support for writing to int *. xattrs and
> > > > xattr_count can be fixed up by the verifier, so we only require the
> > > > user to pass the name and value.
> > >
> > > Sure, but the kfunc can. Did you try passing them in directly?
> > > If that doesn't work for some reason, we should fix it instead.
> >
> > Hm, perhaps this fixup approach might be the simplest in order to
> > assure the needed safety?
>
> +1. I think this is the best approach I can think of.
We're not going to add more and more special cases to the verifier.
The whole approach is unscalable.
If the concern is that int xattr_count passed for xattrs can be
unrelated int pointer obtained from elsewhere, can we pack the xattrs
and xattr_count into a struct and pass it as an argument to the LSM?
Then the pair struct can be passed in directly, ensuring both
originate from the arguments passed to the LSM. That should eliminate
concerns about either being out of sync if obtained from different
sources.
Even if we wanted to ensure argument provenance was stuff loaded from
context, the right solution would be some kfunc flag that constraints
the argument to be derived by following the ctx pointer, not whatever
is done in this patch.
>
> Thanks,
> Song
>
> [...]
^ permalink raw reply
* Re: [PATCH bpf-next 1/2] bpf: add bpf_init_inode_xattr kfunc for atomic inode labeling
From: Song Liu @ 2026-04-27 14:20 UTC (permalink / raw)
To: Matt Bobrowski
Cc: Kumar Kartikeya Dwivedi, David Windsor, Alexander Viro,
Christian Brauner, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Eduard Zingerman, KP Singh, Paul Moore,
James Morris, Serge E. Hallyn, Jan Kara, John Fastabend,
Martin KaFai Lau, Yonghong Song, Jiri Olsa, linux-fsdevel,
linux-kernel, bpf, linux-security-module
In-Reply-To: <ae82Vv0RWzcXqSaz@google.com>
On Mon, Apr 27, 2026 at 11:11 AM Matt Bobrowski
<mattbobrowski@google.com> wrote:
>
> On Mon, Apr 27, 2026 at 05:32:47AM +0200, Kumar Kartikeya Dwivedi wrote:
> > On Mon, 27 Apr 2026 at 05:24, David Windsor <dwindsor@gmail.com> wrote:
> > >
> > > On Sun, Apr 26, 2026 at 10:57 PM Kumar Kartikeya Dwivedi
> > > <memxor@gmail.com> wrote:
> > > >
> > > > On Mon, 27 Apr 2026 at 02:16, David Windsor <dwindsor@gmail.com> wrote:
> > > > >
> > > > > Add bpf_init_inode_xattr() kfunc for BPF LSM programs to atomically set
> > > > > xattrs via inode_init_security hook using lsm_get_xattr_slot().
> > > > >
> > > > > lsm_get_xattr_slot() claims a slot by writing to xattr_count, which BPF
> > > > > programs cannot do: hook arguments are not directly writable from BPF.
> > > > > To hide this, the BPF-facing API is just bpf_init_inode_xattr(name,
> > > > > value), and the verifier transparently rewrites each call into
> > > > > bpf_init_inode_xattr_impl(xattrs, xattr_count, name, value). xattrs and
> > > > > xattr_count are extracted from the hook context, which the verifier
> > > > > spills to the stack at program entry since R1 is clobbered during normal
> > > > > execution.
> > > > >
> > > > > A previous attempt [1] required a kmalloc string output protocol for
> > > > > the xattr name. Since commit 6bcdfd2cac55 ("security: Allow all LSMs to
> > > > > provide xattrs for inode_init_security hook") [2], the xattr name is no
> > > > > longer allocated; it is a static constant. We take advantage of this by
> > > > > passing the name directly. Because we rely on the hook-specific ctx
> > > > > layout, the kfunc is restricted to lsm/inode_init_security.
> > > > >
> > > > > Link: https://kernsec.org/pipermail/linux-security-module-archive/2022-October/034878.html [1]
> > > > > Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6bcdfd2cac55 [2]
> > > > > Suggested-by: Song Liu <song@kernel.org>
> > > > > Signed-off-by: David Windsor <dwindsor@gmail.com>
> > > > > ---
> > > >
> > > > The explanation and code make no sense to me. Why not pass xattrs and
> > > > xattr_count directly as arguments, even if you choose to restrict the
> > > > kfunc to a specific hook? Why does the verifier core need the hack to
> > > > spill the context and extract the two arguments?
> > > >
> > >
> > > xattr_count is an output parameter; we cannot currently write to it in
> > > bpf as there is no verifier support for writing to int *. xattrs and
> > > xattr_count can be fixed up by the verifier, so we only require the
> > > user to pass the name and value.
> >
> > Sure, but the kfunc can. Did you try passing them in directly?
> > If that doesn't work for some reason, we should fix it instead.
>
> Hm, perhaps this fixup approach might be the simplest in order to
> assure the needed safety?
+1. I think this is the best approach I can think of.
Thanks,
Song
[...]
^ permalink raw reply
* [syzbot] [integrity?] [lsm?] WARNING: bad unlock balance in __filemap_add_folio
From: syzbot @ 2026-04-27 13:36 UTC (permalink / raw)
To: dmitry.kasatkin, eric.snowberg, jmorris, linux-integrity,
linux-kernel, linux-security-module, paul, roberto.sassu, serge,
syzkaller-bugs, zohar
Hello,
syzbot found the following issue on:
HEAD commit: 2e6803928193 Merge tag 'tracefs-v7.1-2' of git://git.kerne..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=117dff16580000
kernel config: https://syzkaller.appspot.com/x/.config?x=80b28e8d6ef9384a
dashboard link: https://syzkaller.appspot.com/bug?extid=914bc925a90b7e137017
compiler: Debian clang version 21.1.8 (++20251221033036+2078da43e25a-1~exp1~20251221153213.50), Debian LLD 21.1.8
Unfortunately, I don't have any reproducer for this issue yet.
Downloadable assets:
disk image: https://storage.googleapis.com/syzbot-assets/690094a31275/disk-2e680392.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/7d17ea4e1f81/vmlinux-2e680392.xz
kernel image: https://storage.googleapis.com/syzbot-assets/c1478f49f523/bzImage-2e680392.xz
IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+914bc925a90b7e137017@syzkaller.appspotmail.com
cgroup: Unknown subsys name 'cpuset'
cgroup: Unknown subsys name 'rlimit'
=====================================
WARNING: bad unlock balance detected!
syzkaller #0 Not tainted
-------------------------------------
syz-executor/5795 is trying to release lock (rcu_read_lock) at:
[<ffffffff8b2f32cf>] rcu_lock_release include/linux/rcupdate.h:310 [inline]
[<ffffffff8b2f32cf>] rcu_read_unlock include/linux/rcupdate.h:869 [inline]
[<ffffffff8b2f32cf>] rt_spin_unlock+0x14f/0x200 kernel/locking/spinlock_rt.c:82
but there are no more locks to release!
other info that might help us debug this:
2 locks held by syz-executor/5795:
#0: ffff888035e50f58 (&ima_iint_mutex_key[depth]){+.+.}-{4:4}, at: process_measurement+0x7fd/0x1c90 security/integrity/ima/ima_main.c:319
#1: ffff8880434dc100 (mapping.invalidate_lock#2){++++}-{4:4}, at: filemap_invalidate_lock_shared include/linux/fs.h:1094 [inline]
#1: ffff8880434dc100 (mapping.invalidate_lock#2){++++}-{4:4}, at: do_page_cache_ra mm/readahead.c:333 [inline]
#1: ffff8880434dc100 (mapping.invalidate_lock#2){++++}-{4:4}, at: page_cache_ra_order+0x2a5/0x490 mm/readahead.c:538
stack backtrace:
CPU: 1 UID: 0 PID: 5795 Comm: syz-executor Not tainted syzkaller #0 PREEMPT_{RT,(full)}
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026
Call Trace:
<TASK>
dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120
print_unlock_imbalance_bug+0xdc/0xf0 kernel/locking/lockdep.c:5298
__lock_release kernel/locking/lockdep.c:5537 [inline]
lock_release+0x248/0x3c0 kernel/locking/lockdep.c:5889
rcu_lock_release include/linux/rcupdate.h:310 [inline]
rcu_read_unlock include/linux/rcupdate.h:869 [inline]
rt_spin_unlock+0x15b/0x200 kernel/locking/spinlock_rt.c:82
spin_unlock_irq include/linux/spinlock_rt.h:122 [inline]
__filemap_add_folio+0xc85/0x1200 mm/filemap.c:931
filemap_add_folio+0x2de/0x610 mm/filemap.c:967
page_cache_ra_unbounded+0x407/0x980 mm/readahead.c:282
do_page_cache_ra mm/readahead.c:334 [inline]
page_cache_ra_order+0x2b5/0x490 mm/readahead.c:538
filemap_readahead mm/filemap.c:2664 [inline]
filemap_get_pages+0x832/0x1e70 mm/filemap.c:2710
filemap_read+0x44a/0x1240 mm/filemap.c:2806
__kernel_read+0x50d/0x9c0 fs/read_write.c:532
integrity_kernel_read+0x89/0xd0 security/integrity/iint.c:28
ima_calc_file_hash_tfm security/integrity/ima/ima_crypto.c:222 [inline]
ima_calc_file_hash+0x452/0x870 security/integrity/ima/ima_crypto.c:280
ima_collect_measurement+0x523/0x9d0 security/integrity/ima/ima_api.c:300
process_measurement+0x12d9/0x1c90 security/integrity/ima/ima_main.c:425
ima_file_check+0xe1/0x130 security/integrity/ima/ima_main.c:685
security_file_post_open+0xb3/0x260 security/security.c:2755
do_open fs/namei.c:4701 [inline]
path_openat+0x2e88/0x38a0 fs/namei.c:4858
do_file_open+0x23e/0x4a0 fs/namei.c:4887
file_open_name+0x162/0x1c0 fs/open.c:1322
__do_sys_swapon mm/swapfile.c:3467 [inline]
__se_sys_swapon+0x856/0x2010 mm/swapfile.c:3432
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x15f/0xf80 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f884264c7d7
Code: 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 a7 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffe6306a658 EFLAGS: 00000246 ORIG_RAX: 00000000000000a7
RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f884264c7d7
RDX: 0000000000000000 RSI: 0000000000008000 RDI: 00007f88426e2e5b
RBP: 00007f88426e2e5b R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000008 R11: 0000000000000246 R12: 00007f88428963e0
R13: 00007f88426fdd26 R14: 0000000000200000 R15: 00007f88428963a0
</TASK>
------------[ cut here ]------------
rrln < 0 || rrln > RCU_NEST_PMAX
WARNING: kernel/rcu/tree_plugin.h:443 at __rcu_read_unlock+0x79/0xe0 kernel/rcu/tree_plugin.h:443, CPU#1: syz-executor/5795
Modules linked in:
CPU: 1 UID: 0 PID: 5795 Comm: syz-executor Not tainted syzkaller #0 PREEMPT_{RT,(full)}
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026
RIP: 0010:__rcu_read_unlock+0x79/0xe0 kernel/rcu/tree_plugin.h:443
Code: 75 66 41 83 3e 00 75 27 43 0f b6 04 3c 84 c0 75 41 8b 03 3d 00 00 00 40 73 0f 5b 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc cc 90 <0f> 0b 90 eb eb e8 6d 00 00 00 eb d2 89 d9 80 e1 07 80 c1 03 38 c1
RSP: 0018:ffffc900046e6418 EFLAGS: 00010286
RAX: 00000000ffffffff RBX: ffff888039e82384 RCX: 0000000000000046
RDX: 0000000000000000 RSI: ffffffff8d8986dc RDI: ffff888039e81ec0
RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000
R10: dffffc0000000000 R11: fffffbfff1bcaacc R12: 1ffff110073d0470
R13: ffff888039e81ec0 R14: ffff8880b893c610 R15: dffffc0000000000
FS: 000055555b61b540(0000) GS:ffff8881261fb000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fec05db0e9c CR3: 0000000042cbe000 CR4: 00000000003526f0
Call Trace:
<TASK>
rcu_read_unlock include/linux/rcupdate.h:871 [inline]
rt_spin_unlock+0x160/0x200 kernel/locking/spinlock_rt.c:82
spin_unlock_irq include/linux/spinlock_rt.h:122 [inline]
__filemap_add_folio+0xc85/0x1200 mm/filemap.c:931
filemap_add_folio+0x2de/0x610 mm/filemap.c:967
page_cache_ra_unbounded+0x407/0x980 mm/readahead.c:282
do_page_cache_ra mm/readahead.c:334 [inline]
page_cache_ra_order+0x2b5/0x490 mm/readahead.c:538
filemap_readahead mm/filemap.c:2664 [inline]
filemap_get_pages+0x832/0x1e70 mm/filemap.c:2710
filemap_read+0x44a/0x1240 mm/filemap.c:2806
__kernel_read+0x50d/0x9c0 fs/read_write.c:532
integrity_kernel_read+0x89/0xd0 security/integrity/iint.c:28
ima_calc_file_hash_tfm security/integrity/ima/ima_crypto.c:222 [inline]
ima_calc_file_hash+0x452/0x870 security/integrity/ima/ima_crypto.c:280
ima_collect_measurement+0x523/0x9d0 security/integrity/ima/ima_api.c:300
process_measurement+0x12d9/0x1c90 security/integrity/ima/ima_main.c:425
ima_file_check+0xe1/0x130 security/integrity/ima/ima_main.c:685
security_file_post_open+0xb3/0x260 security/security.c:2755
do_open fs/namei.c:4701 [inline]
path_openat+0x2e88/0x38a0 fs/namei.c:4858
do_file_open+0x23e/0x4a0 fs/namei.c:4887
file_open_name+0x162/0x1c0 fs/open.c:1322
__do_sys_swapon mm/swapfile.c:3467 [inline]
__se_sys_swapon+0x856/0x2010 mm/swapfile.c:3432
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x15f/0xf80 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f884264c7d7
Code: 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 a7 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffe6306a658 EFLAGS: 00000246 ORIG_RAX: 00000000000000a7
RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f884264c7d7
RDX: 0000000000000000 RSI: 0000000000008000 RDI: 00007f88426e2e5b
RBP: 00007f88426e2e5b R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000008 R11: 0000000000000246 R12: 00007f88428963e0
R13: 00007f88426fdd26 R14: 0000000000200000 R15: 00007f88428963a0
</TASK>
---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.
syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
If the report is already addressed, let syzbot know by replying with:
#syz fix: exact-commit-title
If you want to overwrite report's subsystems, reply with:
#syz set subsystems: new-subsystem
(See the list of subsystem names on the web dashboard)
If the report is a duplicate of another one, reply with:
#syz dup: exact-subject-of-another-report
If you want to undo deduplication, reply with:
#syz undup
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox