Linux Security Modules development
 help / color / mirror / Atom feed
* [PATCH v19 5/8] rust: rename `AlwaysRefCounted` to `RefCounted`.
From: Andreas Hindborg @ 2026-06-26 11:54 UTC (permalink / raw)
  To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
	Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
	Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
	Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
	Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
	Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
	Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
	Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
	Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
  Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
	linux-mm, driver-core, linux-block, linux-security-module,
	dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
	Oliver Mangold, Viresh Kumar, Igor Korotin
In-Reply-To: <20260626-unique-ref-v19-0-2607ca88dfdf@kernel.org>

From: Oliver Mangold <oliver.mangold@pm.me>

There are types where it may both be reference counted in some cases and
owned in others. In such cases, obtaining `ARef<T>` from `&T` would be
unsound as it allows creation of `ARef<T>` copy from `&Owned<T>`.

Therefore, we split `AlwaysRefCounted` into `RefCounted` (which `ARef<T>`
would require) and a marker trait to indicate that the type is always
reference counted (and not `Ownable`) so the `&T` -> `ARef<T>` conversion
is possible.

- Rename `AlwaysRefCounted` to `RefCounted`.
- Add a new unsafe trait `AlwaysRefCounted`.
- Implement the new trait `AlwaysRefCounted` for the newly renamed
  `RefCounted` implementations. This leaves functionality of existing
  implementers of `AlwaysRefCounted` intact.

Suggested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Oliver Mangold <oliver.mangold@pm.me>
[ Andreas: Updated commit message and rebase on rust-next (7.2) ]
Acked-by: Igor Korotin <igor.korotin.linux@gmail.com>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Co-developed-by: Andreas Hindborg <a.hindborg@kernel.org>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/auxiliary.rs        | 10 ++++++-
 rust/kernel/block/mq/request.rs | 19 ++++++++-----
 rust/kernel/cred.rs             | 16 +++++++++--
 rust/kernel/device.rs           | 12 +++++++--
 rust/kernel/device/property.rs  | 11 ++++++--
 rust/kernel/drm/device.rs       |  9 +++++--
 rust/kernel/drm/gem/mod.rs      | 16 ++++++++---
 rust/kernel/fs/file.rs          | 23 +++++++++++++---
 rust/kernel/i2c.rs              | 13 ++++++---
 rust/kernel/mm.rs               | 22 ++++++++++++---
 rust/kernel/mm/mmput_async.rs   | 12 +++++++--
 rust/kernel/opp.rs              | 16 ++++++++---
 rust/kernel/owned.rs            |  2 +-
 rust/kernel/pci.rs              | 10 ++++++-
 rust/kernel/pid_namespace.rs    | 15 +++++++++--
 rust/kernel/platform.rs         | 10 ++++++-
 rust/kernel/pwm.rs              | 12 +++++++--
 rust/kernel/sync/aref.rs        | 59 +++++++++++++++++++++++++----------------
 rust/kernel/task.rs             | 13 +++++++--
 rust/kernel/types.rs            | 12 ++++++---
 rust/kernel/usb.rs              | 17 +++++++++---
 rust/kernel/workqueue.rs        |  8 +++---
 22 files changed, 260 insertions(+), 77 deletions(-)

diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs
index c42928d5a2393..854525289c8b4 100644
--- a/rust/kernel/auxiliary.rs
+++ b/rust/kernel/auxiliary.rs
@@ -19,6 +19,10 @@
         to_result, //
     },
     prelude::*,
+    sync::aref::{
+        AlwaysRefCounted,
+        RefCounted, //
+    },
     types::{
         ForLt,
         ForeignOwnable,
@@ -344,7 +348,7 @@ unsafe impl<Ctx: device::DeviceContext> device::AsBusDevice<Ctx> for Device<Ctx>
 kernel::impl_device_context_into_aref!(Device);
 
 // SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
+unsafe impl RefCounted for Device {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::get_device(self.as_ref().as_raw()) };
@@ -363,6 +367,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from a
+// `&Device`.
+unsafe impl AlwaysRefCounted for Device {}
+
 impl<Ctx: device::DeviceContext> AsRef<device::Device<Ctx>> for Device<Ctx> {
     fn as_ref(&self) -> &device::Device<Ctx> {
         // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index ce3e30c81cb5e..8dad15ae4cfb0 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -9,7 +9,11 @@
     block::mq::Operations,
     error::Result,
     sync::{
-        aref::{ARef, AlwaysRefCounted},
+        aref::{
+            ARef,
+            AlwaysRefCounted,
+            RefCounted, //
+        },
         atomic::Relaxed,
         Refcount,
     },
@@ -229,11 +233,10 @@ unsafe impl<T: Operations> Send for Request<T> {}
 // mutate `self` are internally synchronized`
 unsafe impl<T: Operations> Sync for Request<T> {}
 
-// SAFETY: All instances of `Request<T>` are reference counted. This
-// implementation of `AlwaysRefCounted` ensure that increments to the ref count
-// keeps the object alive in memory at least until a matching reference count
-// decrement is executed.
-unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {
+// SAFETY: All instances of `Request<T>` are reference counted. This implementation of `RefCounted`
+// ensure that increments to the ref count keeps the object alive in memory at least until a
+// matching reference count decrement is executed.
+unsafe impl<T: Operations> RefCounted for Request<T> {
     fn inc_ref(&self) {
         self.wrapper_ref().refcount().inc();
     }
@@ -255,3 +258,7 @@ unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
         }
     }
 }
+
+// SAFETY: We currently do not implement `Ownable`, thus it is okay to obtain an `ARef<Request>`
+// from a `&Request` (but this will change in the future).
+unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {}
diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs
index ffa156b9df377..b17736a9adcd5 100644
--- a/rust/kernel/cred.rs
+++ b/rust/kernel/cred.rs
@@ -8,7 +8,15 @@
 //!
 //! Reference: <https://www.kernel.org/doc/html/latest/security/credentials.html>
 
-use crate::{bindings, sync::aref::AlwaysRefCounted, task::Kuid, types::Opaque};
+use crate::{
+    bindings,
+    sync::aref::RefCounted,
+    task::Kuid,
+    types::{
+        AlwaysRefCounted,
+        Opaque, //
+    }, //
+};
 
 /// Wraps the kernel's `struct cred`.
 ///
@@ -76,7 +84,7 @@ pub fn euid(&self) -> Kuid {
 }
 
 // SAFETY: The type invariants guarantee that `Credential` is always ref-counted.
-unsafe impl AlwaysRefCounted for Credential {
+unsafe impl RefCounted for Credential {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference means that the refcount is nonzero.
@@ -90,3 +98,7 @@ unsafe fn dec_ref(obj: core::ptr::NonNull<Credential>) {
         unsafe { bindings::put_cred(obj.cast().as_ptr()) };
     }
 }
+
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Credential>` from a
+// `&Credential`.
+unsafe impl AlwaysRefCounted for Credential {}
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
index 645afc49a27d6..2e90f6a06fd05 100644
--- a/rust/kernel/device.rs
+++ b/rust/kernel/device.rs
@@ -8,8 +8,12 @@
     bindings,
     fmt,
     prelude::*,
-    sync::aref::ARef,
+    sync::aref::{
+        ARef,
+        RefCounted, //
+    },
     types::{
+        AlwaysRefCounted,
         ForeignOwnable,
         Opaque, //
     }, //
@@ -448,7 +452,7 @@ pub fn name(&self) -> &CStr {
 kernel::impl_device_context_into_aref!(Device);
 
 // SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
+unsafe impl RefCounted for Device {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::get_device(self.as_raw()) };
@@ -460,6 +464,10 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from a
+// `&Device`.
+unsafe impl AlwaysRefCounted for Device {}
+
 // SAFETY: As by the type invariant `Device` can be sent to any thread.
 unsafe impl Send for Device {}
 
diff --git a/rust/kernel/device/property.rs b/rust/kernel/device/property.rs
index 5aead835fbbc0..cee7e25013689 100644
--- a/rust/kernel/device/property.rs
+++ b/rust/kernel/device/property.rs
@@ -14,7 +14,10 @@
     fmt,
     prelude::*,
     str::{CStr, CString},
-    sync::aref::ARef,
+    sync::aref::{
+        ARef,
+        AlwaysRefCounted, //
+    },
     types::Opaque,
 };
 
@@ -360,7 +363,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 }
 
 // SAFETY: Instances of `FwNode` are always reference-counted.
-unsafe impl crate::sync::aref::AlwaysRefCounted for FwNode {
+unsafe impl crate::sync::aref::RefCounted for FwNode {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the
         // refcount is non-zero.
@@ -374,6 +377,10 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<FwNode>` from a
+// `&FwNode`.
+unsafe impl AlwaysRefCounted for FwNode {}
+
 enum Node<'a> {
     Borrowed(&'a FwNode),
     Owned(ARef<FwNode>),
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 403fc35353c74..368742a258376 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -15,7 +15,8 @@
     prelude::*,
     sync::aref::{
         ARef,
-        AlwaysRefCounted, //
+        AlwaysRefCounted,
+        RefCounted, //
     },
     types::Opaque,
     workqueue::{
@@ -227,7 +228,7 @@ fn deref(&self) -> &Self::Target {
 
 // SAFETY: DRM device objects are always reference counted and the get/put functions
 // satisfy the requirements.
-unsafe impl<T: drm::Driver> AlwaysRefCounted for Device<T> {
+unsafe impl<T: drm::Driver> RefCounted for Device<T> {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::drm_dev_get(self.as_raw()) };
@@ -242,6 +243,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from a
+// `&Device`.
+unsafe impl<T: drm::Driver> AlwaysRefCounted for Device<T> {}
+
 impl<T: drm::Driver> AsRef<device::Device> for Device<T> {
     fn as_ref(&self) -> &device::Device {
         // SAFETY: `bindings::drm_device::dev` is valid as long as the DRM device itself is valid,
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index 01b5bd47a3332..30d3718578fe8 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -17,7 +17,7 @@
     prelude::*,
     sync::aref::{
         ARef,
-        AlwaysRefCounted, //
+        RefCounted, //
     },
     types::Opaque,
 };
@@ -29,7 +29,7 @@
 #[cfg(CONFIG_RUST_DRM_GEM_SHMEM_HELPER)]
 pub mod shmem;
 
-/// A macro for implementing [`AlwaysRefCounted`] for any GEM object type.
+/// A macro for implementing [`RefCounted`] for any GEM object type.
 ///
 /// Since all GEM objects use the same refcounting scheme.
 #[macro_export]
@@ -42,7 +42,7 @@ impl $( <$( $tparam_id:ident ),+> )? for $type:ty
         )?
     ) => {
         // SAFETY: All GEM objects are refcounted.
-        unsafe impl $( <$( $tparam_id ),+> )? $crate::sync::aref::AlwaysRefCounted for $type
+        unsafe impl $( <$( $tparam_id ),+> )? $crate::sync::aref::RefCounted for $type
         where
             Self: IntoGEMObject,
             $( $( $bind_param : $bind_trait ),+ )?
@@ -61,6 +61,14 @@ unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
                 unsafe { bindings::drm_gem_object_put(obj) };
             }
         }
+
+        // SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<$type>` from a
+        // `&$type`.
+        unsafe impl $( <$( $tparam_id ),+> )? $crate::sync::aref::AlwaysRefCounted for $type
+        where
+            Self: IntoGEMObject,
+            $( $( $bind_param : $bind_trait ),+ )?
+        {}
     };
 }
 #[cfg_attr(not(CONFIG_RUST_DRM_GEM_SHMEM_HELPER), allow(unused))]
@@ -98,7 +106,7 @@ fn close(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>)
 }
 
 /// Trait that represents a GEM object subtype
-pub trait IntoGEMObject: Sized + super::private::Sealed + AlwaysRefCounted {
+pub trait IntoGEMObject: Sized + super::private::Sealed + RefCounted {
     /// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as
     /// this owning object is valid.
     fn as_raw(&self) -> *mut bindings::drm_gem_object;
diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs
index 23ee689bd2400..720e57418358d 100644
--- a/rust/kernel/fs/file.rs
+++ b/rust/kernel/fs/file.rs
@@ -12,8 +12,15 @@
     cred::Credential,
     error::{code::*, to_result, Error, Result},
     fmt,
-    sync::aref::{ARef, AlwaysRefCounted},
-    types::{NotThreadSafe, Opaque},
+    sync::aref::{
+        ARef,
+        RefCounted, //
+    },
+    types::{
+        AlwaysRefCounted,
+        NotThreadSafe,
+        Opaque, //
+    }, //
 };
 use core::ptr;
 
@@ -197,7 +204,7 @@ unsafe impl Sync for File {}
 
 // SAFETY: The type invariants guarantee that `File` is always ref-counted. This implementation
 // makes `ARef<File>` own a normal refcount.
-unsafe impl AlwaysRefCounted for File {
+unsafe impl RefCounted for File {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference means that the refcount is nonzero.
@@ -212,6 +219,10 @@ unsafe fn dec_ref(obj: ptr::NonNull<File>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<File>` from a
+// `&File`.
+unsafe impl AlwaysRefCounted for File {}
+
 /// Wraps the kernel's `struct file`. Not thread safe.
 ///
 /// This type represents a file that is not known to be safe to transfer across thread boundaries.
@@ -233,7 +244,7 @@ pub struct LocalFile {
 
 // SAFETY: The type invariants guarantee that `LocalFile` is always ref-counted. This implementation
 // makes `ARef<LocalFile>` own a normal refcount.
-unsafe impl AlwaysRefCounted for LocalFile {
+unsafe impl RefCounted for LocalFile {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference means that the refcount is nonzero.
@@ -249,6 +260,10 @@ unsafe fn dec_ref(obj: ptr::NonNull<LocalFile>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<LocalFile>` from a
+// `&LocalFile`.
+unsafe impl AlwaysRefCounted for LocalFile {}
+
 impl LocalFile {
     /// Constructs a new `struct file` wrapper from a file descriptor.
     ///
diff --git a/rust/kernel/i2c.rs b/rust/kernel/i2c.rs
index 624b971ca8b0b..02b2c9220eb11 100644
--- a/rust/kernel/i2c.rs
+++ b/rust/kernel/i2c.rs
@@ -18,7 +18,8 @@
     prelude::*,
     sync::aref::{
         ARef,
-        AlwaysRefCounted, //
+        AlwaysRefCounted,
+        RefCounted, //
     },
     types::Opaque, //
 };
@@ -424,7 +425,7 @@ pub fn get(index: i32) -> Result<ARef<Self>> {
 kernel::impl_device_context_into_aref!(I2cAdapter);
 
 // SAFETY: Instances of `I2cAdapter` are always reference-counted.
-unsafe impl AlwaysRefCounted for I2cAdapter {
+unsafe impl RefCounted for I2cAdapter {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::i2c_get_adapter(self.index()) };
@@ -435,6 +436,9 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
         unsafe { bindings::i2c_put_adapter(obj.as_ref().as_raw()) }
     }
 }
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from an
+// `&I2cAdapter`.
+unsafe impl AlwaysRefCounted for I2cAdapter {}
 
 /// The i2c board info representation
 ///
@@ -500,7 +504,7 @@ unsafe impl<Ctx: device::DeviceContext> device::AsBusDevice<Ctx> for I2cClient<C
 kernel::impl_device_context_into_aref!(I2cClient);
 
 // SAFETY: Instances of `I2cClient` are always reference-counted.
-unsafe impl AlwaysRefCounted for I2cClient {
+unsafe impl RefCounted for I2cClient {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::get_device(self.as_ref().as_raw()) };
@@ -511,6 +515,9 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
         unsafe { bindings::put_device(&raw mut (*obj.as_ref().as_raw()).dev) }
     }
 }
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from an
+// `&I2cClient`.
+unsafe impl AlwaysRefCounted for I2cClient {}
 
 impl<Ctx: device::DeviceContext> AsRef<device::Device<Ctx>> for I2cClient<Ctx> {
     fn as_ref(&self) -> &device::Device<Ctx> {
diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs
index 4764d7b68f2a7..83ed94fca14ca 100644
--- a/rust/kernel/mm.rs
+++ b/rust/kernel/mm.rs
@@ -13,8 +13,15 @@
 
 use crate::{
     bindings,
-    sync::aref::{ARef, AlwaysRefCounted},
-    types::{NotThreadSafe, Opaque},
+    sync::aref::{
+        ARef,
+        RefCounted, //
+    },
+    types::{
+        AlwaysRefCounted,
+        NotThreadSafe,
+        Opaque, //
+    }, //
 };
 use core::{ops::Deref, ptr::NonNull};
 
@@ -55,7 +62,7 @@ unsafe impl Send for Mm {}
 unsafe impl Sync for Mm {}
 
 // SAFETY: By the type invariants, this type is always refcounted.
-unsafe impl AlwaysRefCounted for Mm {
+unsafe impl RefCounted for Mm {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The pointer is valid since self is a reference.
@@ -69,6 +76,9 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Mm>` from a `&Mm`.
+unsafe impl AlwaysRefCounted for Mm {}
+
 /// A wrapper for the kernel's `struct mm_struct`.
 ///
 /// This type is like [`Mm`], but with non-zero `mm_users`. It can only be used when `mm_users` can
@@ -91,7 +101,7 @@ unsafe impl Send for MmWithUser {}
 unsafe impl Sync for MmWithUser {}
 
 // SAFETY: By the type invariants, this type is always refcounted.
-unsafe impl AlwaysRefCounted for MmWithUser {
+unsafe impl RefCounted for MmWithUser {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The pointer is valid since self is a reference.
@@ -105,6 +115,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<MmWithUser>` from a
+// `&MmWithUser`.
+unsafe impl AlwaysRefCounted for MmWithUser {}
+
 // Make all `Mm` methods available on `MmWithUser`.
 impl Deref for MmWithUser {
     type Target = Mm;
diff --git a/rust/kernel/mm/mmput_async.rs b/rust/kernel/mm/mmput_async.rs
index b8d2f051225c7..8fbc396e46028 100644
--- a/rust/kernel/mm/mmput_async.rs
+++ b/rust/kernel/mm/mmput_async.rs
@@ -10,7 +10,11 @@
 use crate::{
     bindings,
     mm::MmWithUser,
-    sync::aref::{ARef, AlwaysRefCounted},
+    sync::aref::{
+        ARef,
+        RefCounted, //
+    },
+    types::AlwaysRefCounted,
 };
 use core::{ops::Deref, ptr::NonNull};
 
@@ -34,7 +38,7 @@ unsafe impl Send for MmWithUserAsync {}
 unsafe impl Sync for MmWithUserAsync {}
 
 // SAFETY: By the type invariants, this type is always refcounted.
-unsafe impl AlwaysRefCounted for MmWithUserAsync {
+unsafe impl RefCounted for MmWithUserAsync {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The pointer is valid since self is a reference.
@@ -48,6 +52,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<MmWithUserAsync>`
+// from a `&MmWithUserAsync`.
+unsafe impl AlwaysRefCounted for MmWithUserAsync {}
+
 // Make all `MmWithUser` methods available on `MmWithUserAsync`.
 impl Deref for MmWithUserAsync {
     type Target = MmWithUser;
diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs
index 62e44676125d1..b8db6bdefd077 100644
--- a/rust/kernel/opp.rs
+++ b/rust/kernel/opp.rs
@@ -16,8 +16,14 @@
     ffi::{c_char, c_ulong},
     prelude::*,
     str::CString,
-    sync::aref::{ARef, AlwaysRefCounted},
-    types::Opaque,
+    sync::aref::{
+        ARef,
+        RefCounted, //
+    },
+    types::{
+        AlwaysRefCounted,
+        Opaque, //
+    }, //
 };
 
 #[cfg(CONFIG_CPU_FREQ)]
@@ -1041,7 +1047,7 @@ unsafe impl Send for OPP {}
 unsafe impl Sync for OPP {}
 
 /// SAFETY: The type invariants guarantee that [`OPP`] is always refcounted.
-unsafe impl AlwaysRefCounted for OPP {
+unsafe impl RefCounted for OPP {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference means that the refcount is nonzero.
@@ -1055,6 +1061,10 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<OPP>` from an
+// `&OPP`.
+unsafe impl AlwaysRefCounted for OPP {}
+
 impl OPP {
     /// Creates an owned reference to a [`OPP`] from a valid pointer.
     ///
diff --git a/rust/kernel/owned.rs b/rust/kernel/owned.rs
index 93a5dfcc1e6f5..a156267bf8bb1 100644
--- a/rust/kernel/owned.rs
+++ b/rust/kernel/owned.rs
@@ -27,7 +27,7 @@
 ///
 /// Note: The underlying object is not required to provide internal reference counting, because it
 /// represents a unique, owned reference. If reference counting (on the Rust side) is required,
-/// [`AlwaysRefCounted`](crate::sync::aref::AlwaysRefCounted) should be implemented.
+/// [`RefCounted`](crate::types::RefCounted) should be implemented.
 ///
 /// # Examples
 ///
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index 5071cae6543fd..ea9ef99cecb07 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -19,6 +19,10 @@
     },
     prelude::*,
     str::CStr,
+    sync::aref::{
+        AlwaysRefCounted,
+        RefCounted, //
+    },
     types::Opaque,
     ThisModule, //
 };
@@ -481,7 +485,7 @@ unsafe impl<Ctx: device::DeviceContext> device::AsBusDevice<Ctx> for Device<Ctx>
 impl<'a> crate::dma::Device<'a> for Device<device::Core<'a>> {}
 
 // SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
+unsafe impl RefCounted for Device {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::pci_dev_get(self.as_raw()) };
@@ -493,6 +497,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from a
+// `&Device`.
+unsafe impl AlwaysRefCounted for Device {}
+
 impl<Ctx: device::DeviceContext> AsRef<device::Device<Ctx>> for Device<Ctx> {
     fn as_ref(&self) -> &device::Device<Ctx> {
         // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid
diff --git a/rust/kernel/pid_namespace.rs b/rust/kernel/pid_namespace.rs
index 979a9718f153d..067f68b99e8c5 100644
--- a/rust/kernel/pid_namespace.rs
+++ b/rust/kernel/pid_namespace.rs
@@ -7,7 +7,14 @@
 //! C header: [`include/linux/pid_namespace.h`](srctree/include/linux/pid_namespace.h) and
 //! [`include/linux/pid.h`](srctree/include/linux/pid.h)
 
-use crate::{bindings, sync::aref::AlwaysRefCounted, types::Opaque};
+use crate::{
+    bindings,
+    sync::aref::RefCounted,
+    types::{
+        AlwaysRefCounted,
+        Opaque, //
+    }, //
+};
 use core::ptr;
 
 /// Wraps the kernel's `struct pid_namespace`. Thread safe.
@@ -41,7 +48,7 @@ pub unsafe fn from_ptr<'a>(ptr: *const bindings::pid_namespace) -> &'a Self {
 }
 
 // SAFETY: Instances of `PidNamespace` are always reference-counted.
-unsafe impl AlwaysRefCounted for PidNamespace {
+unsafe impl RefCounted for PidNamespace {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference means that the refcount is nonzero.
@@ -55,6 +62,10 @@ unsafe fn dec_ref(obj: ptr::NonNull<PidNamespace>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<PidNamespace>` from
+// a `&PidNamespace`.
+unsafe impl AlwaysRefCounted for PidNamespace {}
+
 // SAFETY:
 // - `PidNamespace::dec_ref` can be called from any thread.
 // - It is okay to send ownership of `PidNamespace` across thread boundaries.
diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs
index 9b362e0495d32..0ba676445b06d 100644
--- a/rust/kernel/platform.rs
+++ b/rust/kernel/platform.rs
@@ -27,6 +27,10 @@
     },
     of,
     prelude::*,
+    sync::aref::{
+        AlwaysRefCounted,
+        RefCounted, //
+    },
     types::Opaque,
     ThisModule, //
 };
@@ -518,7 +522,7 @@ pub fn optional_irq_by_name(&self, name: &CStr) -> Result<IrqRequest<'_>> {
 impl<'a> crate::dma::Device<'a> for Device<device::Core<'a>> {}
 
 // SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
+unsafe impl RefCounted for Device {
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
         unsafe { bindings::get_device(self.as_ref().as_raw()) };
@@ -530,6 +534,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from a
+// `&Device`.
+unsafe impl AlwaysRefCounted for Device {}
+
 impl<Ctx: device::DeviceContext> AsRef<device::Device<Ctx>> for Device<Ctx> {
     fn as_ref(&self) -> &device::Device<Ctx> {
         // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid
diff --git a/rust/kernel/pwm.rs b/rust/kernel/pwm.rs
index 6c9d667009ef7..2d1cd74dd98e1 100644
--- a/rust/kernel/pwm.rs
+++ b/rust/kernel/pwm.rs
@@ -13,7 +13,11 @@
     devres,
     error::{self, to_result},
     prelude::*,
-    sync::aref::{ARef, AlwaysRefCounted},
+    sync::aref::{
+        ARef,
+        AlwaysRefCounted,
+        RefCounted, //
+    },
     types::Opaque, //
 };
 use core::{
@@ -629,7 +633,7 @@ pub fn new<'a>(
 }
 
 // SAFETY: Implements refcounting for `Chip` using the embedded `struct device`.
-unsafe impl<T: PwmOps> AlwaysRefCounted for Chip<T> {
+unsafe impl<T: PwmOps> RefCounted for Chip<T> {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: `self.0.get()` points to a valid `pwm_chip` because `self` exists.
@@ -647,6 +651,10 @@ unsafe fn dec_ref(obj: NonNull<Chip<T>>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Chip<T>>` from a
+// `&Chip<T>`.
+unsafe impl<T: PwmOps> AlwaysRefCounted for Chip<T> {}
+
 // SAFETY: `Chip` is a wrapper around `*mut bindings::pwm_chip`. The underlying C
 // structure's state is managed and synchronized by the kernel's device model
 // and PWM core locking mechanisms. Therefore, it is safe to move the `Chip`
diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs
index 3bd5eb8a1a526..ea5a16b8163a6 100644
--- a/rust/kernel/sync/aref.rs
+++ b/rust/kernel/sync/aref.rs
@@ -11,7 +11,7 @@
 //! underlying object, but this refcount is internal to the object. It essentially is a Rust
 //! implementation of the `get_` and `put_` pattern used in C for reference counting.
 //!
-//! To make use of [`ARef<MyType>`], `MyType` needs to implement [`AlwaysRefCounted`]. It is a trait
+//! To make use of [`ARef<MyType>`], `MyType` needs to implement [`RefCounted`]. It is a trait
 //! for accessing the internal reference count of an object of the `MyType` type.
 //!
 //! [`Arc`]: crate::sync::Arc
@@ -24,11 +24,9 @@
     ptr::NonNull, //
 };
 
-/// Types that are _always_ reference counted.
+/// Types that are internally reference counted.
 ///
 /// It allows such types to define their own custom ref increment and decrement functions.
-/// Additionally, it allows users to convert from a shared reference `&T` to an owned reference
-/// [`ARef<T>`].
 ///
 /// This is usually implemented by wrappers to existing structures on the C side of the code. For
 /// Rust code, the recommendation is to use [`Arc`](crate::sync::Arc) to create reference-counted
@@ -45,9 +43,8 @@
 /// at least until matching decrements are performed.
 ///
 /// Implementers must also ensure that all instances are reference-counted. (Otherwise they
-/// won't be able to honour the requirement that [`AlwaysRefCounted::inc_ref`] keep the object
-/// alive.)
-pub unsafe trait AlwaysRefCounted {
+/// won't be able to honour the requirement that [`RefCounted::inc_ref`] keep the object alive.)
+pub unsafe trait RefCounted {
     /// Increments the reference count on the object.
     fn inc_ref(&self);
 
@@ -60,11 +57,27 @@ pub unsafe trait AlwaysRefCounted {
     /// Callers must ensure that there was a previous matching increment to the reference count,
     /// and that the object is no longer used after its reference count is decremented (as it may
     /// result in the object being freed), unless the caller owns another increment on the refcount
-    /// (e.g., it calls [`AlwaysRefCounted::inc_ref`] twice, then calls
-    /// [`AlwaysRefCounted::dec_ref`] once).
+    /// (e.g., it calls [`RefCounted::inc_ref`] twice, then calls [`RefCounted::dec_ref`] once).
     unsafe fn dec_ref(obj: NonNull<Self>);
 }
 
+/// Always reference-counted type.
+///
+/// It allows deriving a counted reference [`ARef<T>`] from a `&T`.
+///
+/// This provides some convenience, but it allows "escaping" borrow checks on `&T`. As it
+/// complicates attempts to ensure that a reference to T is unique, it is optional to provide for
+/// [`RefCounted`] types. See *Safety* below.
+///
+/// # Safety
+///
+/// Implementers must ensure that no safety invariants are violated by upgrading an `&T` to an
+/// [`ARef<T>`]. In particular that implies [`AlwaysRefCounted`] and [`crate::types::Ownable`]
+/// cannot be implemented for the same type, as this would allow violating the uniqueness guarantee
+/// of [`crate::types::Owned<T>`] by dereferencing it into an `&T` and obtaining an [`ARef`] from
+/// that.
+pub unsafe trait AlwaysRefCounted: RefCounted {}
+
 /// An owned reference to an always-reference-counted object.
 ///
 /// The object's reference count is automatically decremented when an instance of [`ARef`] is
@@ -75,7 +88,7 @@ pub unsafe trait AlwaysRefCounted {
 ///
 /// The pointer stored in `ptr` is non-null and valid for the lifetime of the [`ARef`] instance. In
 /// particular, the [`ARef`] instance owns an increment on the underlying object's reference count.
-pub struct ARef<T: AlwaysRefCounted> {
+pub struct ARef<T: RefCounted> {
     ptr: NonNull<T>,
     _p: PhantomData<T>,
 }
@@ -84,19 +97,19 @@ pub struct ARef<T: AlwaysRefCounted> {
 // it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs
 // `T` to be `Send` because any thread that has an `ARef<T>` may ultimately access `T` using a
 // mutable reference, for example, when the reference count reaches zero and `T` is dropped.
-unsafe impl<T: AlwaysRefCounted + Sync + Send> Send for ARef<T> {}
+unsafe impl<T: RefCounted + Sync + Send> Send for ARef<T> {}
 
 // SAFETY: It is safe to send `&ARef<T>` to another thread when the underlying `T` is `Sync`
 // because it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally,
 // it needs `T` to be `Send` because any thread that has a `&ARef<T>` may clone it and get an
 // `ARef<T>` on that thread, so the thread may ultimately access `T` using a mutable reference, for
 // example, when the reference count reaches zero and `T` is dropped.
-unsafe impl<T: AlwaysRefCounted + Sync + Send> Sync for ARef<T> {}
+unsafe impl<T: RefCounted + Sync + Send> Sync for ARef<T> {}
 
 // Even if `T` is pinned, pointers to `T` can still move.
-impl<T: AlwaysRefCounted> Unpin for ARef<T> {}
+impl<T: RefCounted> Unpin for ARef<T> {}
 
-impl<T: AlwaysRefCounted> ARef<T> {
+impl<T: RefCounted> ARef<T> {
     /// Creates a new instance of [`ARef`].
     ///
     /// It takes over an increment of the reference count on the underlying object.
@@ -125,12 +138,12 @@ pub unsafe fn from_raw(ptr: NonNull<T>) -> Self {
     ///
     /// ```
     /// use core::ptr::NonNull;
-    /// use kernel::sync::aref::{ARef, AlwaysRefCounted};
+    /// use kernel::sync::aref::{ARef, RefCounted};
     ///
     /// struct Empty {}
     ///
     /// # // SAFETY: TODO.
-    /// unsafe impl AlwaysRefCounted for Empty {
+    /// unsafe impl RefCounted for Empty {
     ///     fn inc_ref(&self) {}
     ///     unsafe fn dec_ref(_obj: NonNull<Self>) {}
     /// }
@@ -148,7 +161,7 @@ pub fn into_raw(me: Self) -> NonNull<T> {
     }
 }
 
-impl<T: AlwaysRefCounted> Clone for ARef<T> {
+impl<T: RefCounted> Clone for ARef<T> {
     fn clone(&self) -> Self {
         self.inc_ref();
         // SAFETY: We just incremented the refcount above.
@@ -156,7 +169,7 @@ fn clone(&self) -> Self {
     }
 }
 
-impl<T: AlwaysRefCounted> Deref for ARef<T> {
+impl<T: RefCounted> Deref for ARef<T> {
     type Target = T;
 
     fn deref(&self) -> &Self::Target {
@@ -173,7 +186,7 @@ fn from(b: &T) -> Self {
     }
 }
 
-impl<T: AlwaysRefCounted> Drop for ARef<T> {
+impl<T: RefCounted> Drop for ARef<T> {
     fn drop(&mut self) {
         // SAFETY: The type invariants guarantee that the `ARef` owns the reference we're about to
         // decrement.
@@ -183,19 +196,19 @@ fn drop(&mut self) {
 
 impl<T, U> PartialEq<ARef<U>> for ARef<T>
 where
-    T: AlwaysRefCounted + PartialEq<U>,
-    U: AlwaysRefCounted,
+    T: RefCounted + PartialEq<U>,
+    U: RefCounted,
 {
     #[inline]
     fn eq(&self, other: &ARef<U>) -> bool {
         T::eq(&**self, &**other)
     }
 }
-impl<T: AlwaysRefCounted + Eq> Eq for ARef<T> {}
+impl<T: RefCounted + Eq> Eq for ARef<T> {}
 
 impl<T, U> PartialEq<&'_ U> for ARef<T>
 where
-    T: AlwaysRefCounted + PartialEq<U>,
+    T: RefCounted + PartialEq<U>,
 {
     #[inline]
     fn eq(&self, other: &&U) -> bool {
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 38273f4eedb51..6259430b0ca31 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -10,7 +10,12 @@
     pid_namespace::PidNamespace,
     prelude::*,
     sync::aref::ARef,
-    types::{NotThreadSafe, Opaque},
+    types::{
+        AlwaysRefCounted,
+        NotThreadSafe,
+        Opaque,
+        RefCounted, //
+    },
 };
 use core::{
     ops::Deref,
@@ -347,7 +352,7 @@ pub fn group_leader(&self) -> &Task {
 }
 
 // SAFETY: The type invariants guarantee that `Task` is always refcounted.
-unsafe impl crate::sync::aref::AlwaysRefCounted for Task {
+unsafe impl RefCounted for Task {
     #[inline]
     fn inc_ref(&self) {
         // SAFETY: The existence of a shared reference means that the refcount is nonzero.
@@ -361,6 +366,10 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Task>` from a
+// `&Task`.
+unsafe impl AlwaysRefCounted for Task {}
+
 impl PartialEq for Task {
     #[inline]
     fn eq(&self, other: &Self) -> bool {
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index c41eab0ec983c..5ef763717e59a 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -15,9 +15,15 @@
 pub mod for_lt;
 pub use for_lt::ForLt;
 
-pub use crate::owned::{
-    Ownable,
-    Owned, //
+pub use crate::{
+    owned::{
+        Ownable,
+        Owned, //
+    },
+    sync::aref::{
+        AlwaysRefCounted,
+        RefCounted, //
+    }, //
 };
 
 /// Used to transfer ownership to and from foreign (non-Rust) languages.
diff --git a/rust/kernel/usb.rs b/rust/kernel/usb.rs
index 7aff0c82d0afc..59350c6b0df2a 100644
--- a/rust/kernel/usb.rs
+++ b/rust/kernel/usb.rs
@@ -18,7 +18,10 @@
         to_result, //
     },
     prelude::*,
-    sync::aref::AlwaysRefCounted,
+    sync::aref::{
+        AlwaysRefCounted,
+        RefCounted, //
+    },
     types::Opaque,
     ThisModule, //
 };
@@ -392,7 +395,7 @@ fn as_ref(&self) -> &Device {
 }
 
 // SAFETY: Instances of `Interface` are always reference-counted.
-unsafe impl AlwaysRefCounted for Interface {
+unsafe impl RefCounted for Interface {
     fn inc_ref(&self) {
         // SAFETY: The invariants of `Interface` guarantee that `self.as_raw()`
         // returns a valid `struct usb_interface` pointer, for which we will
@@ -406,6 +409,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Interface>` from a
+// `&Interface`.
+unsafe impl AlwaysRefCounted for Interface {}
+
 // SAFETY: A `Interface` is always reference-counted and can be released from any thread.
 unsafe impl Send for Interface {}
 
@@ -443,7 +450,7 @@ fn as_raw(&self) -> *mut bindings::usb_device {
 kernel::impl_device_context_into_aref!(Device);
 
 // SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl AlwaysRefCounted for Device {
+unsafe impl RefCounted for Device {
     fn inc_ref(&self) {
         // SAFETY: The invariants of `Device` guarantee that `self.as_raw()`
         // returns a valid `struct usb_device` pointer, for which we will
@@ -457,6 +464,10 @@ unsafe fn dec_ref(obj: NonNull<Self>) {
     }
 }
 
+// SAFETY: We do not implement `Ownable`, thus it is okay to obtain an `ARef<Device>` from a
+// `&Device`.
+unsafe impl AlwaysRefCounted for Device {}
+
 impl<Ctx: device::DeviceContext> AsRef<device::Device<Ctx>> for Device<Ctx> {
     fn as_ref(&self) -> &device::Device<Ctx> {
         // SAFETY: By the type invariant of `Self`, `self.as_raw()` is a pointer to a valid
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index 7e253b6f299ce..77673b8ea45fb 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -192,7 +192,7 @@
     sync::{
         aref::{
             ARef,
-            AlwaysRefCounted, //
+            RefCounted, //
         },
         Arc,
         LockClassKey, //
@@ -954,7 +954,7 @@ unsafe impl<T, const ID: u64> RawDelayedWorkItem<ID> for Pin<KBox<T>>
 //     implementation of `WorkItemPointer` for `ARef<T>`.
 unsafe impl<T, const ID: u64> WorkItemPointer<ID> for ARef<T>
 where
-    T: AlwaysRefCounted,
+    T: RefCounted,
     T: WorkItem<ID, Pointer = Self>,
     T: HasWork<T, ID>,
 {
@@ -987,7 +987,7 @@ unsafe impl<T, const ID: u64> WorkItemPointer<ID> for ARef<T>
 // requirements of `WorkItemPointer`.
 unsafe impl<T, const ID: u64> RawWorkItem<ID> for ARef<T>
 where
-    T: AlwaysRefCounted,
+    T: RefCounted,
     T: WorkItem<ID, Pointer = Self>,
     T: HasWork<T, ID>,
 {
@@ -1020,7 +1020,7 @@ unsafe impl<T, const ID: u64> RawDelayedWorkItem<ID> for ARef<T>
 where
     T: WorkItem<ID, Pointer = Self>,
     T: HasDelayedWork<T, ID>,
-    T: AlwaysRefCounted,
+    T: RefCounted,
 {
 }
 

-- 
2.51.2



^ permalink raw reply related

* [PATCH v19 6/8] rust: Add missing SAFETY documentation for `ARef` example
From: Andreas Hindborg @ 2026-06-26 11:54 UTC (permalink / raw)
  To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
	Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
	Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
	Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
	Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
	Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
	Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
	Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
	Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
  Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
	linux-mm, driver-core, linux-block, linux-security-module,
	dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
	Oliver Mangold
In-Reply-To: <20260626-unique-ref-v19-0-2607ca88dfdf@kernel.org>

From: Oliver Mangold <oliver.mangold@pm.me>

SAFETY comment in rustdoc example was just 'TODO'. Fixed.

Signed-off-by: Oliver Mangold <oliver.mangold@pm.me>
Reviewed-by: Daniel Almeida <daniel.almeida@collabora.com>
Co-developed-by: Andreas Hindborg <a.hindborg@kernel.org>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/sync/aref.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs
index ea5a16b8163a6..f26ca39b84d0d 100644
--- a/rust/kernel/sync/aref.rs
+++ b/rust/kernel/sync/aref.rs
@@ -142,7 +142,9 @@ pub unsafe fn from_raw(ptr: NonNull<T>) -> Self {
     ///
     /// struct Empty {}
     ///
-    /// # // SAFETY: TODO.
+    /// // SAFETY: The `RefCounted` implementation for `Empty` does not count references and never
+    /// // frees the underlying object. Thus we can act as owning an increment on the refcount for
+    /// // the object that we pass to the newly created `ARef`.
     /// unsafe impl RefCounted for Empty {
     ///     fn inc_ref(&self) {}
     ///     unsafe fn dec_ref(_obj: NonNull<Self>) {}
@@ -150,7 +152,7 @@ pub unsafe fn from_raw(ptr: NonNull<T>) -> Self {
     ///
     /// let mut data = Empty {};
     /// let ptr = NonNull::<Empty>::new(&mut data).unwrap();
-    /// # // SAFETY: TODO.
+    /// // SAFETY: We keep `data` around longer than the `ARef`.
     /// let data_ref: ARef<Empty> = unsafe { ARef::from_raw(ptr) };
     /// let raw_ptr: NonNull<Empty> = ARef::into_raw(data_ref);
     ///

-- 
2.51.2



^ permalink raw reply related

* [PATCH v19 7/8] rust: Add `OwnableRefCounted`
From: Andreas Hindborg @ 2026-06-26 11:54 UTC (permalink / raw)
  To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
	Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
	Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
	Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
	Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
	Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
	Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
	Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
	Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
  Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
	linux-mm, driver-core, linux-block, linux-security-module,
	dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
	Oliver Mangold
In-Reply-To: <20260626-unique-ref-v19-0-2607ca88dfdf@kernel.org>

From: Oliver Mangold <oliver.mangold@pm.me>

Types implementing one of these traits can safely convert between an
`ARef<T>` and an `Owned<T>`.

This is useful for types which generally are accessed through an `ARef`
but have methods which can only safely be called when the reference is
unique, like e.g. `block::mq::Request::end_ok()`.

Signed-off-by: Oliver Mangold <oliver.mangold@pm.me>
[ Andreas: Fix formatting, update documentation, fix error handling in
  examples. ]
Co-developed-by: Andreas Hindborg <a.hindborg@kernel.org>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/owned.rs     | 145 +++++++++++++++++++++++++++++++++++++++++++++--
 rust/kernel/sync/aref.rs |  16 +++++-
 rust/kernel/types.rs     |   1 +
 3 files changed, 156 insertions(+), 6 deletions(-)

diff --git a/rust/kernel/owned.rs b/rust/kernel/owned.rs
index a156267bf8bb1..acb611f084ff3 100644
--- a/rust/kernel/owned.rs
+++ b/rust/kernel/owned.rs
@@ -14,20 +14,26 @@
     pin::Pin,
     ptr::NonNull, //
 };
+use kernel::{
+    sync::aref::ARef,
+    types::RefCounted, //
+};
 
 use kernel::types::ForeignOwnable;
 
 /// Types that specify their own way of performing allocation and destruction. Typically, this trait
 /// is implemented on types from the C side.
 ///
-/// Implementing this trait allows types to be referenced via the [`Owned<Self>`] pointer type. This
-/// is useful when it is desirable to tie the lifetime of the reference to an owned object, rather
-/// than pass around a bare reference. [`Ownable`] types can define custom drop logic that is
-/// executed when the owned reference [`Owned<Self>`] pointing to the object is dropped.
+/// Implementing this trait allows types to be referenced via the [`Owned<Self>`] pointer type.
+///  - This is useful when it is desirable to tie the lifetime of an object reference to an owned
+///    object, rather than pass around a bare reference.
+///  - [`Ownable`] types can define custom drop logic that is executed when the owned reference
+///    of type [`Owned<_>`] pointing to the object is dropped.
 ///
 /// Note: The underlying object is not required to provide internal reference counting, because it
 /// represents a unique, owned reference. If reference counting (on the Rust side) is required,
-/// [`RefCounted`](crate::types::RefCounted) should be implemented.
+/// [`RefCounted`] should be implemented. [`OwnableRefCounted`] should be implemented if conversion
+/// between unique and shared (reference counted) ownership is needed.
 ///
 /// # Examples
 ///
@@ -99,6 +105,8 @@ pub trait Ownable {
     /// Callers must ensure that they have exclusive ownership of the `Self` pointed to by `this`,
     /// and that this ownership is transferred to the `release` method. `this` must not be used
     /// after calling this method, as the underlying object may have been freed.
+    ///
+    /// `this` is pinned and implementers of this method must observe this constraint.
     unsafe fn release(this: NonNull<Self>);
 }
 
@@ -136,6 +144,8 @@ pub unsafe fn from_raw(ptr: NonNull<T>) -> Self {
     ///
     /// This function does not drop the underlying `T`. When this function returns, ownership of the
     /// underlying `T` is with the caller.
+    ///
+    /// Note that the returned pointer is pinned.
     #[inline]
     pub fn into_raw(me: Self) -> NonNull<T> {
         ManuallyDrop::new(me).ptr
@@ -236,3 +246,128 @@ unsafe fn borrow_mut<'a>(ptr: *mut kernel::ffi::c_void) -> Self::BorrowedMut<'a>
         unsafe { Pin::new_unchecked(inner) }
     }
 }
+
+/// A trait for objects that can be wrapped in either one of the reference types [`Owned`] and
+/// [`ARef`].
+///
+/// # Examples
+///
+/// A minimal example implementation of [`OwnableRefCounted`], [`Ownable`] and its usage with
+/// [`ARef`] and [`Owned`] looks like this:
+///
+/// ```
+/// # #![expect(clippy::disallowed_names)]
+/// # use core::cell::Cell;
+/// # use core::ptr::NonNull;
+/// # use kernel::alloc::{flags, kbox::KBox, AllocError};
+/// # use kernel::sync::aref::{ARef, RefCounted};
+/// # use kernel::types::{Owned, Ownable, OwnableRefCounted};
+///
+/// // An internally refcounted struct for demonstration purposes.
+/// //
+/// // # Invariants
+/// //
+/// // - `refcount` is always non-zero for a valid object.
+/// // - `refcount` is >1 if there is more than one Rust reference to it.
+/// //
+/// struct Foo {
+///     refcount: Cell<usize>,
+/// }
+///
+/// impl Foo {
+///     fn new() -> Result<Owned<Self>> {
+///         // We are just using a `KBox` here to handle the actual allocation, as our `Foo` is
+///         // not actually a C-allocated object.
+///         // INVARIANT: We initialize `refcount` to 1, satisfying the invariants.
+///         let result = KBox::new(
+///             Foo {
+///                 refcount: Cell::new(1),
+///             },
+///             flags::GFP_KERNEL,
+///         )?;
+///         let result = KBox::into_non_null(result);
+///         // SAFETY:
+///         //  - We just allocated the `Self`, thus it is valid and we own it.
+///         //  - We can transfer this ownership to the `from_raw` method.
+///         Ok(unsafe { Owned::from_raw(result) })
+///     }
+/// }
+///
+/// // SAFETY: We increment and decrement each time the respective function is called and only free
+/// // the `Foo` when the refcount reaches zero.
+/// unsafe impl RefCounted for Foo {
+///     fn inc_ref(&self) {
+///         self.refcount.replace(self.refcount.get() + 1);
+///     }
+///
+///     unsafe fn dec_ref(this: NonNull<Self>) {
+///         // SAFETY: By requirement on calling this function, the refcount is non-zero,
+///         // implying the underlying object is valid.
+///         let refcount = unsafe { &this.as_ref().refcount };
+///         let new_refcount = refcount.get() - 1;
+///         if new_refcount == 0 {
+///             // The `Foo` will be dropped when `KBox` goes out of scope.
+///             // SAFETY: The [`KBox<Foo>`] is still alive as the old refcount is 1. We can pass
+///             // ownership to the [`KBox`] as by requirement on calling this function,
+///             // the `Self` will no longer be used by the caller.
+///             unsafe { KBox::from_raw(this.as_ptr()) };
+///         } else {
+///             refcount.replace(new_refcount);
+///         }
+///     }
+/// }
+///
+/// impl OwnableRefCounted for Foo {
+///     fn try_from_shared(this: ARef<Self>) -> Result<Owned<Self>, ARef<Self>> {
+///         if this.refcount.get() == 1 {
+///             // SAFETY: The `Foo` is still alive and has no other Rust references as the refcount
+///             // is 1.
+///             Ok(unsafe { Owned::from_raw(ARef::into_raw(this)) })
+///         } else {
+///             Err(this)
+///         }
+///     }
+///
+///     fn into_shared(this: Owned<Self>) -> ARef<Self> {
+///         // SAFETY: An `Owned<Foo>` holds the unique reference (refcount 1), which we transfer to
+///         // the new `ARef`.
+///         unsafe { ARef::from_raw(Owned::into_raw(this)) }
+///     }
+/// }
+///
+/// impl Ownable for Foo {
+///     unsafe fn release(this: NonNull<Self>) {
+///         // SAFETY: Using `dec_ref()` from [`RefCounted`] to release is okay, as the refcount is
+///         // always 1 for an [`Owned<Foo>`].
+///         unsafe { Foo::dec_ref(this) };
+///     }
+/// }
+///
+/// let foo = Foo::new()?;
+/// let foo = ARef::from(foo);
+/// {
+///     let bar = foo.clone();
+///     assert!(Owned::try_from(bar).is_err());
+/// }
+/// assert!(Owned::try_from(foo).is_ok());
+/// # Ok::<(), Error>(())
+/// ```
+pub trait OwnableRefCounted: RefCounted + Ownable + Sized {
+    /// Checks if the [`ARef`] is unique and converts it to an [`Owned`] if that is the case.
+    /// Otherwise it returns again an [`ARef`] to the same underlying object.
+    fn try_from_shared(this: ARef<Self>) -> Result<Owned<Self>, ARef<Self>>;
+
+    /// Converts the [`Owned`] into an [`ARef`].
+    fn into_shared(this: Owned<Self>) -> ARef<Self>;
+}
+
+impl<T: OwnableRefCounted> TryFrom<ARef<T>> for Owned<T> {
+    type Error = ARef<T>;
+    /// Tries to convert the [`ARef`] to an [`Owned`] by calling
+    /// [`try_from_shared()`](OwnableRefCounted::try_from_shared). In case the [`ARef`] is not
+    /// unique, it returns again an [`ARef`] to the same underlying object.
+    #[inline]
+    fn try_from(b: ARef<T>) -> Result<Owned<T>, Self::Error> {
+        T::try_from_shared(b)
+    }
+}
diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs
index f26ca39b84d0d..e6ffe7c650c39 100644
--- a/rust/kernel/sync/aref.rs
+++ b/rust/kernel/sync/aref.rs
@@ -23,6 +23,10 @@
     ops::Deref,
     ptr::NonNull, //
 };
+use kernel::types::{
+    OwnableRefCounted,
+    Owned, //
+};
 
 /// Types that are internally reference counted.
 ///
@@ -35,7 +39,10 @@
 /// Note: Implementing this trait allows types to be wrapped in an [`ARef<Self>`]. It requires an
 /// internal reference count and provides only shared references. If unique references are required
 /// [`Ownable`](crate::types::Ownable) should be implemented which allows types to be wrapped in an
-/// [`Owned<Self>`](crate::types::Owned).
+/// [`Owned<Self>`](crate::types::Owned). Implementing the trait
+/// [`OwnableRefCounted`] allows to convert between unique and
+/// shared references (i.e. [`Owned<Self>`](crate::types::Owned) and
+/// [`ARef<Self>`]).
 ///
 /// # Safety
 ///
@@ -188,6 +195,13 @@ fn from(b: &T) -> Self {
     }
 }
 
+impl<T: OwnableRefCounted> From<Owned<T>> for ARef<T> {
+    #[inline]
+    fn from(b: Owned<T>) -> Self {
+        T::into_shared(b)
+    }
+}
+
 impl<T: RefCounted> Drop for ARef<T> {
     fn drop(&mut self) {
         // SAFETY: The type invariants guarantee that the `ARef` owns the reference we're about to
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index 5ef763717e59a..6aa760952cb63 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -18,6 +18,7 @@
 pub use crate::{
     owned::{
         Ownable,
+        OwnableRefCounted,
         Owned, //
     },
     sync::aref::{

-- 
2.51.2



^ permalink raw reply related

* [PATCH v19 4/8] rust: page: convert to `Ownable`
From: Andreas Hindborg @ 2026-06-26 11:54 UTC (permalink / raw)
  To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
	Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
	Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
	Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
	Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
	Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
	Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
	Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
	Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
  Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
	linux-mm, driver-core, linux-block, linux-security-module,
	dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
	Asahi Lina
In-Reply-To: <20260626-unique-ref-v19-0-2607ca88dfdf@kernel.org>

From: Asahi Lina <lina@asahilina.net>

This allows Page references to be returned as borrowed references,
without necessarily owning the struct page.

Remove `BorrowedPage` and update users to use `Owned<Page>`.

Signed-off-by: Asahi Lina <lina@asahilina.net>
[ Andreas: Fix formatting and add a safety comment, update users. ]
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
---
 drivers/android/binder/page_range.rs |  10 +--
 rust/kernel/alloc/allocator.rs       |  19 +++---
 rust/kernel/alloc/allocator/iter.rs  |   6 +-
 rust/kernel/page.rs                  | 122 +++++++++--------------------------
 4 files changed, 46 insertions(+), 111 deletions(-)

diff --git a/drivers/android/binder/page_range.rs b/drivers/android/binder/page_range.rs
index e54a90e62402a..7941eb85b4ef4 100644
--- a/drivers/android/binder/page_range.rs
+++ b/drivers/android/binder/page_range.rs
@@ -33,7 +33,7 @@
     sync::{aref::ARef, Mutex, SpinLock},
     task::Pid,
     transmute::FromBytes,
-    types::Opaque,
+    types::{Opaque, Owned},
     uaccess::UserSliceReader,
 };
 
@@ -198,7 +198,7 @@ unsafe impl Send for Inner {}
 #[repr(C)]
 struct PageInfo {
     lru: bindings::list_head,
-    page: Option<Page>,
+    page: Option<Owned<Page>>,
     range: *const ShrinkablePageRange,
 }
 
@@ -206,7 +206,7 @@ impl PageInfo {
     /// # Safety
     ///
     /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
-    unsafe fn set_page(me: *mut PageInfo, page: Page) {
+    unsafe fn set_page(me: *mut PageInfo, page: Owned<Page>) {
         // SAFETY: This pointer offset is in bounds.
         let ptr = unsafe { &raw mut (*me).page };
 
@@ -229,13 +229,13 @@ unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
         let ptr = unsafe { &raw const (*me).page };
 
         // SAFETY: The pointer is valid for reading.
-        unsafe { (*ptr).as_ref() }
+        unsafe { (*ptr).as_deref() }
     }
 
     /// # Safety
     ///
     /// The caller ensures that writing to `me.page` is ok for the duration of 'a.
-    unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
+    unsafe fn take_page(me: *mut PageInfo) -> Option<Owned<Page>> {
         // SAFETY: This pointer offset is in bounds.
         let ptr = unsafe { &raw mut (*me).page };
 
diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs
index cd4203f27aed0..c7b9b069cf75d 100644
--- a/rust/kernel/alloc/allocator.rs
+++ b/rust/kernel/alloc/allocator.rs
@@ -169,7 +169,7 @@ unsafe fn realloc(
 }
 
 impl Vmalloc {
-    /// Convert a pointer to a [`Vmalloc`] allocation to a [`page::BorrowedPage`].
+    /// Convert a pointer to a [`Vmalloc`] allocation to a [`Page`](page::Page) reference.
     ///
     /// # Examples
     ///
@@ -202,20 +202,17 @@ impl Vmalloc {
     ///
     /// - `ptr` must be a valid pointer to a [`Vmalloc`] allocation.
     /// - `ptr` must remain valid for the entire duration of `'a`.
-    pub unsafe fn to_page<'a>(ptr: NonNull<u8>) -> page::BorrowedPage<'a> {
+    pub unsafe fn to_page<'a>(ptr: NonNull<u8>) -> &'a page::Page {
         // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
         let page = unsafe { bindings::vmalloc_to_page(ptr.as_ptr().cast()) };
 
-        // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid pointer
-        // to `Vmalloc` memory.
-        let page = unsafe { NonNull::new_unchecked(page) };
-
         // SAFETY:
-        // - `page` is a valid pointer to a `struct page`, given that by the safety requirements of
-        //   this function `ptr` is a valid pointer to a `Vmalloc` allocation.
-        // - By the safety requirements of this function `ptr` is valid for the entire lifetime of
-        //   `'a`.
-        unsafe { page::BorrowedPage::from_raw(page) }
+        // - `vmalloc_to_page` returns a valid, non-null pointer to a `struct page` for a valid
+        //   pointer to `Vmalloc` memory, given that by the safety requirements of this function
+        //   `ptr` is a valid pointer to a `Vmalloc` allocation.
+        // - By the safety requirements of this function `ptr`, and hence the `struct page`, is
+        //   valid for the entire lifetime of `'a`.
+        unsafe { &*page.cast() }
     }
 }
 
diff --git a/rust/kernel/alloc/allocator/iter.rs b/rust/kernel/alloc/allocator/iter.rs
index 02fda3ea5cae6..8dcc16ed89893 100644
--- a/rust/kernel/alloc/allocator/iter.rs
+++ b/rust/kernel/alloc/allocator/iter.rs
@@ -9,7 +9,7 @@
     ptr::NonNull, //
 };
 
-/// An [`Iterator`] of [`page::BorrowedPage`] items owned by a [`Vmalloc`] allocation.
+/// An [`Iterator`] of [`Page`](page::Page) references owned by a [`Vmalloc`] allocation.
 ///
 /// # Guarantees
 ///
@@ -28,11 +28,11 @@ pub struct VmallocPageIter<'a> {
     size: usize,
     /// The current page index of the [`Iterator`].
     index: usize,
-    _p: PhantomData<page::BorrowedPage<'a>>,
+    _p: PhantomData<&'a page::Page>,
 }
 
 impl<'a> Iterator for VmallocPageIter<'a> {
-    type Item = page::BorrowedPage<'a>;
+    type Item = &'a page::Page;
 
     fn next(&mut self) -> Option<Self::Item> {
         let offset = self.index.checked_mul(page::PAGE_SIZE)?;
diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs
index 8affd8262891b..6dc1c2395acaf 100644
--- a/rust/kernel/page.rs
+++ b/rust/kernel/page.rs
@@ -12,16 +12,16 @@
         code::*,
         Result, //
     },
+    types::{
+        Opaque,
+        Ownable,
+        Owned, //
+    },
     uaccess::UserSliceReader, //
 };
-use core::{
-    marker::PhantomData,
-    mem::ManuallyDrop,
-    ops::Deref,
-    ptr::{
-        self,
-        NonNull, //
-    }, //
+use core::ptr::{
+    self,
+    NonNull, //
 };
 
 /// A bitwise shift for the page size.
@@ -65,93 +65,29 @@ pub const fn page_align(addr: usize) -> Option<usize> {
     Some(sum & PAGE_MASK)
 }
 
-/// Representation of a non-owning reference to a [`Page`].
-///
-/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
-/// [`Vmalloc`] allocation such as [`VBox`].
-///
-/// # Example
-///
-/// ```
-/// # use kernel::{bindings, prelude::*};
-/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
-/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
-///
-/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
-///     let ptr = ptr::from_ref(&**vbox);
-///
-///     // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
-///     let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
-///
-///     // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
-///     // pointer to `Vmalloc` memory.
-///     let page = unsafe { NonNull::new_unchecked(page) };
-///
-///     // SAFETY:
-///     // - `self.0` is a valid pointer to a `struct page`.
-///     // - `self.0` is valid for the entire lifetime of `self`.
-///     unsafe { BorrowedPage::from_raw(page) }
-/// }
-///
-/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
-/// let page = borrow_page(&mut vbox);
-///
-/// // SAFETY: There is no concurrent read or write to this page.
-/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
-/// # Ok::<(), Error>(())
-/// ```
-///
-/// # Invariants
-///
-/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
-///
-/// [`VBox`]: kernel::alloc::VBox
-/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
-pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
-
-impl<'a> BorrowedPage<'a> {
-    /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
-    ///
-    /// # Safety
-    ///
-    /// - `ptr` must point to a valid `bindings::page`.
-    /// - `ptr` must remain valid for the entire lifetime `'a`.
-    pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
-        let page = Page { page: ptr };
-
-        // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
-        // `'a`.
-        Self(ManuallyDrop::new(page), PhantomData)
-    }
-}
-
-impl<'a> Deref for BorrowedPage<'a> {
-    type Target = Page;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-/// Trait to be implemented by types which provide an [`Iterator`] implementation of
-/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
+/// Trait to be implemented by types which provide an [`Iterator`] of [`Page`] references, such as
+/// [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
 pub trait AsPageIter {
     /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
-    type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
+    type Iter<'a>: Iterator<Item = &'a Page>
     where
         Self: 'a;
 
-    /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
+    /// Returns an [`Iterator`] of [`Page`] references over all pages owned by `self`.
     fn page_iter(&mut self) -> Self::Iter<'_>;
 }
 
-/// A pointer to a page that owns the page allocation.
+/// A `struct page`.
+///
+/// A `Page` is accessed through a shared reference or through an owning [`Owned<Page>`]; the latter
+/// frees the page allocation when it is dropped.
 ///
 /// # Invariants
 ///
-/// The pointer is valid, and has ownership over the page.
+/// The `Page` is backed by a valid `struct page`.
+#[repr(transparent)]
 pub struct Page {
-    page: NonNull<bindings::page>,
+    page: Opaque<bindings::page>,
 }
 
 // SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
@@ -185,19 +121,20 @@ impl Page {
     /// # Ok::<(), kernel::alloc::AllocError>(())
     /// ```
     #[inline]
-    pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
+    pub fn alloc_page(flags: Flags) -> Result<Owned<Self>, AllocError> {
         // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
         // is always safe to call this method.
         let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
         let page = NonNull::new(page).ok_or(AllocError)?;
-        // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
-        // allocated page. We transfer that ownership to the new `Page` object.
-        Ok(Self { page })
+        // SAFETY: We just successfully allocated a page, so we now have ownership of the newly
+        // allocated page. We transfer that ownership to the new `Owned<Page>` object.
+        // Since `Page` is transparent, we can cast the pointer directly.
+        Ok(unsafe { Owned::from_raw(page.cast()) })
     }
 
     /// Returns a raw pointer to the page.
     pub fn as_ptr(&self) -> *mut bindings::page {
-        self.page.as_ptr()
+        Opaque::cast_into(&self.page)
     }
 
     /// Get the node id containing this page.
@@ -372,10 +309,11 @@ pub unsafe fn copy_from_user_slice_raw(
     }
 }
 
-impl Drop for Page {
+impl Ownable for Page {
     #[inline]
-    fn drop(&mut self) {
-        // SAFETY: By the type invariants, we have ownership of the page and can free it.
-        unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
+    unsafe fn release(this: NonNull<Self>) {
+        // SAFETY: By the function safety requirements, we have ownership of the page and can free
+        // it. Since Page is transparent, we can cast the raw pointer directly.
+        unsafe { bindings::__free_pages(this.as_ptr().cast(), 0) };
     }
 }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v19 1/8] rust: alloc: add `KBox::into_non_null`
From: Andreas Hindborg @ 2026-06-26 11:53 UTC (permalink / raw)
  To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
	Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
	Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
	Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
	Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
	Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
	Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
	Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
	Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
  Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
	linux-mm, driver-core, linux-block, linux-security-module,
	dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm
In-Reply-To: <20260626-unique-ref-v19-0-2607ca88dfdf@kernel.org>

Add a method to consume a `Box<T, A>` and return a `NonNull<T>`. This
is a convenience wrapper around `Self::into_raw` for callers that need
a `NonNull` pointer rather than a raw pointer.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
---
 rust/kernel/alloc/kbox.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
index 35d1e015848dd..d534e8adcf7b3 100644
--- a/rust/kernel/alloc/kbox.rs
+++ b/rust/kernel/alloc/kbox.rs
@@ -211,6 +211,15 @@ pub fn leak<'a>(b: Self) -> &'a mut T {
         // which points to an initialized instance of `T`.
         unsafe { &mut *Box::into_raw(b) }
     }
+
+    /// Consumes the `Box<T,A>` and returns a `NonNull<T>`.
+    ///
+    /// Like [`Self::into_raw`], but returns a `NonNull`.
+    #[inline]
+    pub fn into_non_null(b: Self) -> NonNull<T> {
+        // SAFETY: `KBox::into_raw` returns a valid pointer.
+        unsafe { NonNull::new_unchecked(Self::into_raw(b)) }
+    }
 }
 
 impl<T, A> Box<MaybeUninit<T>, A>

-- 
2.51.2



^ permalink raw reply related

* Re: [RFC PATCH 1/4] capabily: Add new capable_noaudit
From: Darrick J. Wong @ 2026-06-26 15:16 UTC (permalink / raw)
  To: cem
  Cc: linux-fsdevel, jack, hch, serge, linux-security-module,
	linux-kernel, linux-xfs
In-Reply-To: <20260626114533.102138-2-cem@kernel.org>

s/capabily/capability/ in the subject even if the typo actually makes it
easier to find the thread.

On Fri, Jun 26, 2026 at 01:45:20PM +0200, cem@kernel.org wrote:
> From: Carlos Maiolino <cem@kernel.org>
> 
> In some situations (quota enforcement bypass in this case) we'd like to
> check for a specific capability without triggering spurious audit
> messages from security modules like selinux.
> 
> Add a new helper so we don't need to use ns_capable_noaudit() directly.
> 
> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
>  include/linux/capability.h |  5 +++++
>  kernel/capability.c        | 17 +++++++++++++++++
>  2 files changed, 22 insertions(+)
> 
> diff --git a/include/linux/capability.h b/include/linux/capability.h
> index 37db92b3d6f8..873416ba884c 100644
> --- a/include/linux/capability.h
> +++ b/include/linux/capability.h
> @@ -145,6 +145,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap);
>  extern bool has_ns_capability_noaudit(struct task_struct *t,
>  				      struct user_namespace *ns, int cap);
>  extern bool capable(int cap);
> +extern bool capable_noaudit(int cap);
>  extern bool ns_capable(struct user_namespace *ns, int cap);
>  extern bool ns_capable_noaudit(struct user_namespace *ns, int cap);
>  extern bool ns_capable_setid(struct user_namespace *ns, int cap);
> @@ -167,6 +168,10 @@ static inline bool capable(int cap)
>  {
>  	return true;
>  }
> +static inline bool capable_noaudit(int cap)
> +{
> +	return true;
> +}
>  static inline bool ns_capable(struct user_namespace *ns, int cap)
>  {
>  	return true;
> diff --git a/kernel/capability.c b/kernel/capability.c
> index 829f49ae07b9..2c2d1e8300bd 100644
> --- a/kernel/capability.c
> +++ b/kernel/capability.c
> @@ -416,6 +416,23 @@ bool capable(int cap)
>  	return ns_capable(&init_user_ns, cap);
>  }
>  EXPORT_SYMBOL(capable);
> +
> +/**
> + * capable_noaudit - Determine if the current task has a superior
> + * capability in effect (unaudited).
> + * @cap: The capability to be tested for
> + *
> + * This is the same as capable(), except it uses CAP_OPT_NOAUDIT as to prevent
> + * issuing spurious audit messages.
> + *
> + * This sets PF_SUPERPRIV on the task if the capability is available on the
> + * assumption that it's about to be used.

Can you mention that this checks the current process' effective
capabilities (as opposed to the real ones)?  So that nobody else has to
suffer the confusion I pointed out in [1] which was the source of the
security bugs in the first place?

(I do like the wrapper though)

--D

[1] https://lore.kernel.org/linux-xfs/20260625160317.GY6078@frogsfrogsfrogs/

> + */
> +bool capable_noaudit(int cap)
> +{
> +	return ns_capable_noaudit(&init_user_ns, cap);
> +}
> +EXPORT_SYMBOL(capable_noaudit);
>  #endif /* CONFIG_MULTIUSER */
>  
>  /**
> -- 
> 2.54.0
> 
> 

^ permalink raw reply

* Re: [RFC PATCH 2/4] quota: Don't issue audit messages on quota enforcing
From: Darrick J. Wong @ 2026-06-26 15:18 UTC (permalink / raw)
  To: cem
  Cc: linux-fsdevel, jack, hch, serge, linux-security-module,
	linux-kernel, linux-xfs
In-Reply-To: <20260626114533.102138-3-cem@kernel.org>

On Fri, Jun 26, 2026 at 01:45:21PM +0200, cem@kernel.org wrote:
> From: Carlos Maiolino <cem@kernel.org>
> 
> Calling capable() to determine if we can bypass quota enforcement or not
> can trigger spurious audit messages. We don't really require it here so
> just use the capable_noaudit() version.
> 
> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
>  fs/quota/dquot.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
> index 64cf42721496..1122a29215f7 100644
> --- a/fs/quota/dquot.c
> +++ b/fs/quota/dquot.c
> @@ -1308,7 +1308,7 @@ static int ignore_hardlimit(struct dquot *dquot)
>  {
>  	struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
>  
> -	return capable(CAP_SYS_RESOURCE) &&
> +	return capable_noaudit(CAP_SYS_RESOURCE) &&

Yeah, we're just checking if we're going to enforce hardlimits, not
actually denying something based on lack of capability.  For all we know
the user is well under their disk quota limit.

Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

>  	       (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
>  		!(info->dqi_flags & DQF_ROOT_SQUASH));
>  }
> -- 
> 2.54.0
> 
> 

^ permalink raw reply

* Re: [RFC PATCH 3/4] xfs: replace ns_capable_noaudit()
From: Darrick J. Wong @ 2026-06-26 15:19 UTC (permalink / raw)
  To: cem
  Cc: linux-fsdevel, jack, hch, serge, linux-security-module,
	linux-kernel, linux-xfs
In-Reply-To: <20260626114533.102138-4-cem@kernel.org>

On Fri, Jun 26, 2026 at 01:45:22PM +0200, cem@kernel.org wrote:
> From: Carlos Maiolino <cem@kernel.org>
> 
> We don't need to use ns_capable_noaudit() as all we care is the initial
> user namespace, use capable_noaudit() instead.

Might as well do the one in xfs_fsmap.c too, since it was originally a
capable() call.

--D

> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
>  fs/xfs/xfs_trans_dquot.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
> index 50e5b323f7f1..30c2f6ec0aac 100644
> --- a/fs/xfs/xfs_trans_dquot.c
> +++ b/fs/xfs/xfs_trans_dquot.c
> @@ -835,7 +835,7 @@ xfs_trans_dqresv(
>  	if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
>  	    dqp->q_id &&
>  	    xfs_dquot_is_enforced(dqp) &&
> -	    !ns_capable_noaudit(&init_user_ns, CAP_SYS_RESOURCE)) {
> +	    !capable_noaudit(CAP_SYS_RESOURCE)) {
>  		int		quota_nl;
>  		bool		fatal;
>  
> -- 
> 2.54.0
> 
> 

^ permalink raw reply

* Re: [RFC PATCH 1/4] capabily: Add new capable_noaudit
From: Paul Moore @ 2026-06-26 15:31 UTC (permalink / raw)
  To: cem
  Cc: linux-fsdevel, jack, djwong, hch, serge, linux-security-module,
	linux-kernel, linux-xfs
In-Reply-To: <20260626114533.102138-2-cem@kernel.org>

On Fri, Jun 26, 2026 at 7:49 AM <cem@kernel.org> wrote:
>
> From: Carlos Maiolino <cem@kernel.org>
>
> In some situations (quota enforcement bypass in this case) we'd like to
> check for a specific capability without triggering spurious audit
> messages from security modules like selinux.
>
> Add a new helper so we don't need to use ns_capable_noaudit() directly.
>
> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
>  include/linux/capability.h |  5 +++++
>  kernel/capability.c        | 17 +++++++++++++++++
>  2 files changed, 22 insertions(+)

This is Serge's call, not mine, but FWIW, I somewhat prefer to see
code use the ns_capable_XXX() variants directly as I like to think it
means some thought went into ensuring the capability check is being
done in the right namespace.  Yes, we all know that capable() just
uses the init namespace, but I like to think that having to type that
out in the parameter list might be a good double check ;)

-- 
paul-moore.com

^ permalink raw reply

* Re: [RFC PATCH 1/4] capabily: Add new capable_noaudit
From: Serge E. Hallyn @ 2026-06-26 17:46 UTC (permalink / raw)
  To: Paul Moore
  Cc: cem, linux-fsdevel, jack, djwong, hch, serge,
	linux-security-module, linux-kernel, linux-xfs
In-Reply-To: <CAHC9VhQNURc=d4AOVDF-z29fjLasCiLf120Y-N3txEBccpkfSA@mail.gmail.com>

On Fri, Jun 26, 2026 at 11:31:06AM -0400, Paul Moore wrote:
> On Fri, Jun 26, 2026 at 7:49 AM <cem@kernel.org> wrote:
> >
> > From: Carlos Maiolino <cem@kernel.org>
> >
> > In some situations (quota enforcement bypass in this case) we'd like to
> > check for a specific capability without triggering spurious audit
> > messages from security modules like selinux.
> >
> > Add a new helper so we don't need to use ns_capable_noaudit() directly.
> >
> > Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> > ---
> >  include/linux/capability.h |  5 +++++
> >  kernel/capability.c        | 17 +++++++++++++++++
> >  2 files changed, 22 insertions(+)
> 
> This is Serge's call, not mine, but FWIW, I somewhat prefer to see
> code use the ns_capable_XXX() variants directly as I like to think it
> means some thought went into ensuring the capability check is being
> done in the right namespace.  Yes, we all know that capable() just
> uses the init namespace, but I like to think that having to type that
> out in the parameter list might be a good double check ;)

Hm, yeah, on he one hand it seems like a nice shortcut, but I still
see people confusing what 'capable' really does, so standardizing on
ns_capable_noaudit(&init_user_ns, x) might be worthwhile.

(and then patch 3 can go)

^ permalink raw reply

* Re: [PATCH v2 stable/linux-6.18.y 0/2] Backport Fix incorrect overlayfs mmap() and mprotect() LSM access controls
From: Sasha Levin @ 2026-06-26 17:54 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: Sasha Levin, linux-fsdevel, linux-kernel, linux-unionfs,
	linux-security-module, selinux, bpf, stable, lujialin4
In-Reply-To: <20260626075035.143419-1-caixinchen1@huawei.com>

> [PATCH v2 stable/linux-6.18.y 0/2] Backport Fix incorrect overlayfs
> mmap() and mprotect() LSM access controls

Both patches queued for 6.18, thanks.

-- 
Thanks,
Sasha

^ permalink raw reply

* Re: [RFC PATCH 2/2] selftests/landlock: Add test for TCP fast open
From: Mickaël Salaün @ 2026-06-26 21:19 UTC (permalink / raw)
  To: Matthieu Buffet
  Cc: Bryam Vargas, Günther Noack, linux-security-module,
	Mikhail Ivanov, Paul Moore, Eric Dumazet, Neal Cardwell,
	linux-kernel, netdev
In-Reply-To: <20260617180526.15627-3-matthieu@buffet.re>

On Wed, Jun 17, 2026 at 08:05:24PM +0200, Matthieu Buffet wrote:
> Enforce that TCP Fast Open is controlled by
> LANDLOCK_ACCESS_NET_CONNECT_TCP. Semantics of connect() and
> sendmsg(MSG_FASTOPEN) should be identical from Landlock's perspective.
> Also enforce error code consistency, since UDP sockets ignore
> the MSG_FASTOPEN flag while Unix sockets reject it.
> 
> Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
> ---
>  tools/testing/selftests/landlock/net_test.c | 155 ++++++++++++++++++++
>  1 file changed, 155 insertions(+)
> 
> diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
> index 0c256e7c8675..177ed28e70f6 100644
> --- a/tools/testing/selftests/landlock/net_test.c
> +++ b/tools/testing/selftests/landlock/net_test.c
> @@ -258,6 +258,64 @@ static int connect_variant(const int sock_fd,
>  	return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
>  }
>  
> +static int sendto_variant_addrlen(const int sock_fd,
> +				  const struct service_fixture *const srv,
> +				  const socklen_t addrlen, void *buf,
> +				  size_t len, size_t flags)
> +{
> +	const struct sockaddr *dst = NULL;
> +	ssize_t ret;
> +
> +	/*
> +        * We never want our processes to be killed by SIGPIPE: we check return
> +        * codes and errno, so that we have actual error messages.
> +        */

There are some extra spaces above.

> +	flags |= MSG_NOSIGNAL;
> +
> +	if (srv != NULL) {

Just `if (srv) {`

> +		switch (srv->protocol.domain) {
> +		case AF_UNSPEC:
> +		case AF_INET:
> +			dst = (const struct sockaddr *)&srv->ipv4_addr;
> +			break;
> +
> +		case AF_INET6:
> +			dst = (const struct sockaddr *)&srv->ipv6_addr;
> +			break;
> +
> +		case AF_UNIX:
> +			dst = (const struct sockaddr *)&srv->unix_addr;
> +			break;
> +
> +		default:
> +			errno = EAFNOSUPPORT;
> +			return -errno;
> +		}
> +	}
> +
> +	ret = sendto(sock_fd, buf, len, flags, dst, addrlen);
> +	if (ret < 0)
> +		return -errno;
> +
> +	/* errno is not set in cases of partial writes. */
> +	if (ret != len)
> +		return -EINTR;
> +
> +	return 0;
> +}
> +
> +static int sendto_variant(const int sock_fd,
> +			  const struct service_fixture *const srv, void *buf,
> +			  size_t len, size_t flags)
> +{
> +	socklen_t addrlen = 0;
> +
> +	if (srv != NULL)

ditto

> +		addrlen = get_addrlen(srv, false);
> +
> +	return sendto_variant_addrlen(sock_fd, srv, addrlen, buf, len, flags);
> +}
> +
>  FIXTURE(protocol)
>  {
>  	struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0;
> @@ -950,6 +1008,103 @@ TEST_F(protocol, connect_unspec)
>  	EXPECT_EQ(0, close(bind_fd));
>  }
>  
> +TEST_F(protocol, tcp_fastopen)
> +{
> +	const bool restricted = variant->sandbox == TCP_SANDBOX &&
> +				variant->prot.type == SOCK_STREAM &&
> +				(variant->prot.protocol == IPPROTO_TCP ||
> +				 variant->prot.protocol == IPPROTO_IP) &&
> +				(variant->prot.domain == AF_INET ||
> +				 variant->prot.domain == AF_INET6);
> +	const struct landlock_ruleset_attr ruleset_attr = {
> +		.handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
> +	};
> +	int bind_fd, client_fd, status;
> +	char buf;
> +	pid_t child;
> +
> +	bind_fd = socket_variant(&self->srv0);
> +	ASSERT_LE(0, bind_fd);
> +	EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
> +	if (self->srv0.protocol.type == SOCK_STREAM)
> +		EXPECT_EQ(0, listen(bind_fd, backlog));
> +
> +	child = fork();
> +	ASSERT_LE(0, child);
> +	if (child == 0) {
> +		int connect_fd, ret;
> +
> +		/* Closes listening socket for the child. */
> +		EXPECT_EQ(0, close(bind_fd));
> +
> +		connect_fd = socket_variant(&self->srv0);
> +		ASSERT_LE(0, connect_fd);
> +
> +		if (variant->sandbox == TCP_SANDBOX) {
> +			const int ruleset_fd = landlock_create_ruleset(
> +				&ruleset_attr, sizeof(ruleset_attr), 0);
> +			ASSERT_LE(0, ruleset_fd);
> +
> +			enforce_ruleset(_metadata, ruleset_fd);
> +			EXPECT_EQ(0, close(ruleset_fd));
> +		}
> +
> +		/* Fast Open with no address. */
> +		ret = sendto_variant(connect_fd, NULL, NULL, 0, MSG_FASTOPEN);


> +		if (self->srv0.protocol.domain == AF_UNIX) {
> +			ASSERT_EQ(-ENOTCONN, ret);
> +		} else if (self->srv0.protocol.type == SOCK_DGRAM) {
> +			ASSERT_EQ(-EDESTADDRREQ, ret);
> +		} else {
> +			ASSERT_EQ(-EINVAL, ret);
> +		}
> +
> +		/* Fast Open to a denied address. */
> +		ret = sendto_variant(connect_fd, &self->srv0, "A", 1,
> +				     MSG_FASTOPEN);
> +		if (restricted) {
> +			ASSERT_EQ(-EACCES, ret);
> +		} else if (self->srv0.protocol.domain == AF_UNIX &&
> +			   self->srv0.protocol.type == SOCK_STREAM) {
> +			ASSERT_EQ(-EOPNOTSUPP, ret);
> +		} else {
> +			ASSERT_EQ(0, ret);
> +		}

All these ret checks should be done with EXPECT_EQ because they don't
block the test itself and we can get more info by checking more after
that.

> +
> +		EXPECT_EQ(0, close(connect_fd));
> +		_exit(_metadata->exit_code);
> +		return;
> +	}
> +
> +	client_fd = bind_fd;
> +	if (!restricted && self->srv0.protocol.type == SOCK_STREAM &&
> +	    self->srv0.protocol.domain != AF_UNIX) {
> +		client_fd = accept(bind_fd, NULL, 0);
> +		ASSERT_LE(0, client_fd);
> +	}
> +
> +	if (restricted) {
> +		EXPECT_EQ(-1, read(client_fd, &buf, 1));
> +		EXPECT_EQ(ENOTCONN, errno);
> +	} else if (self->srv0.protocol.domain == AF_UNIX &&
> +		   self->srv0.protocol.type == SOCK_STREAM) {
> +		EXPECT_EQ(-1, read(client_fd, &buf, 1));
> +		EXPECT_EQ(EINVAL, errno);
> +	} else {
> +		EXPECT_EQ(1, read(client_fd, &buf, 1));
> +		EXPECT_EQ('A', buf);
> +	}
> +
> +	EXPECT_EQ(child, waitpid(child, &status, 0));
> +	EXPECT_EQ(1, WIFEXITED(status));
> +	EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
> +
> +	if (client_fd != bind_fd)
> +		EXPECT_LE(0, close(client_fd));
> +
> +	EXPECT_EQ(0, close(bind_fd));
> +}
> +
>  FIXTURE(ipv4)
>  {
>  	struct service_fixture srv0, srv1;
> -- 
> 2.47.3
> 
> 

^ permalink raw reply

* Re: [RFC PATCH 1/2] landlock: fix TCP Fast Open connection bypass
From: Mickaël Salaün @ 2026-06-26 20:40 UTC (permalink / raw)
  To: Matthieu Buffet
  Cc: Bryam Vargas, Günther Noack, linux-security-module,
	Mikhail Ivanov, Paul Moore, Eric Dumazet, Neal Cardwell,
	linux-kernel, netdev
In-Reply-To: <20260617180526.15627-2-matthieu@buffet.re>

Thanks Matthieu, could you please rebase this serise on the master
branch (especially on top of your UDP changes)?

This patch will be useful for backports though.

On Wed, Jun 17, 2026 at 08:05:23PM +0200, Matthieu Buffet wrote:
> The documentation of the socket_connect() LSM hook states that it
> controls connecting a socket to a remote address. It has not been the
> case since the addition of TCP Fast Open (RFC 7413) support, which allows
> opening a TCP connection (thus, setting a socket's destination address)
> via the MSG_FASTOPEN flag passed to sendto()/sendmsg()/sendmmsg(). The
> problem then got duplicated into MPTCP.
> 
> Landlock did not take it into account when its TCP support was added,
> leaving a bypass of TCP connect policy.
> 
> Ideally a call to the LSM hook would be added in the fastopen code path,
> in order to fix this generically. But connect() hooks are designed to run
> with the socket locked, unlike sendmsg() hooks.
> 
> Closes: https://github.com/landlock-lsm/linux/issues/41
> Fixes: fff69fb03dde ("landlock: Support network rules with TCP bind and connect")
> Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
> ---
>  security/landlock/net.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/security/landlock/net.c b/security/landlock/net.c
> index 4ee4002a8f56..a2375762c18b 100644
> --- a/security/landlock/net.c
> +++ b/security/landlock/net.c
> @@ -246,9 +246,26 @@ static int hook_socket_connect(struct socket *const sock,
>  					   access_request);
>  }
>  
> +static int hook_socket_sendmsg(struct socket *const sock,
> +			       struct msghdr *const msg, const int size)
> +{
> +	struct sockaddr *const address = msg->msg_name;
> +	const int addrlen = msg->msg_namelen;
> +
> +	if (sk_is_tcp(sock->sk) && address != NULL &&
> +	    (msg->msg_flags & MSG_FASTOPEN) != 0) {

This might be a bit better:

  if ((msg->msg_flags & MSG_FASTOPEN) && address && sk_is_tcp(sock->sk))

> +		return current_check_access_socket(
> +			sock, address, addrlen,
> +			LANDLOCK_ACCESS_NET_CONNECT_TCP);
> +	}
> +
> +	return 0;
> +}
> +
>  static struct security_hook_list landlock_hooks[] __ro_after_init = {
>  	LSM_HOOK_INIT(socket_bind, hook_socket_bind),
>  	LSM_HOOK_INIT(socket_connect, hook_socket_connect),
> +	LSM_HOOK_INIT(socket_sendmsg, hook_socket_sendmsg),
>  };
>  
>  __init void landlock_add_net_hooks(void)
> -- 
> 2.47.3
> 
> 

^ permalink raw reply

* Re: [PATCH bpf-next v2 1/5] bpf: Verify signed loader metadata at load time
From: KP Singh @ 2026-06-26 22:01 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Paul Moore, Daniel Borkmann, ast, James.Bottomley, bboscaccy,
	memxor, torvalds, bpf, linux-security-module
In-Reply-To: <DJIL18C2F40B.39U9WHD43SDBR@gmail.com>

On Fri, Jun 26, 2026 at 3:16 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu Jun 25, 2026 at 5:59 PM PDT, Paul Moore wrote:
> >
> > For all the reasons I gave previously, I can't support moving the
> > existing security_bpf_prog_load() hook at this point in time.
>
> Paul,
> it's not up to you to approve or deny where security_bpf_prog_load()
> is called within bpf subsystem as long as it doesn't affect behavior.
> Daniel's patch doesn't change observable state from LSMs pov.
> It merely moves the call from syscall.c to verifier.c.
> So we're going to proceed.
>
> > I'm guessing you still haven't looked at Blaise's patchset from last
> > September.
>
> Blaise approach was Nacked because you guys ignored TOCTOU issue.
> I pointed it a year ago before AI was a thing. Then sashiko
> pointed it again and the bot explained it in detail. It was again
> ignored.
>

Agreed, with Alexei: I like Daniel's solution because it avoids the
complexity of machine independent BTF for loader programs and still
addresses the signing goal, without the TOCTOU. I am okay with the
current implementation as well. I think having kfuncs in loader
programs would be useful, but we don't need to rush it or tie it to
the signing effort.

- KP

> Daniel's v1 sadly had the same issue and sashiko spotted it too.
> Hence v2 is moving the location of security_bpf_prog_load().
>
> > on-list.  As you can see from the lore archives, he has vehemently
> > opposed the approach you are proposing for quite a while.
>
> Exactly, because you kept ignoring TOCTOU issue.
> Claiming support for signed bpf that can be easily defeated
> is a shameless security scam.
>

^ permalink raw reply

* Re: [PATCH bpf-next v2 1/5] bpf: Verify signed loader metadata at load time
From: Paul Moore @ 2026-06-27  1:32 UTC (permalink / raw)
  To: KP Singh
  Cc: Alexei Starovoitov, Daniel Borkmann, ast, James.Bottomley,
	bboscaccy, memxor, torvalds, bpf, linux-security-module
In-Reply-To: <CACYkzJ64SxZj1Qvf1JRDfpoOWkg+bk_hpXcfGfPvecSfSu2jdA@mail.gmail.com>

On Fri, Jun 26, 2026 at 6:01 PM KP Singh <kpsingh@kernel.org> wrote:
> On Fri, Jun 26, 2026 at 3:16 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> > On Thu Jun 25, 2026 at 5:59 PM PDT, Paul Moore wrote:
> > >
> > > For all the reasons I gave previously, I can't support moving the
> > > existing security_bpf_prog_load() hook at this point in time.
> >
> > Paul,
> > it's not up to you to approve or deny where security_bpf_prog_load()
> > is called within bpf subsystem as long as it doesn't affect behavior.
> > Daniel's patch doesn't change observable state from LSMs pov.
> > It merely moves the call from syscall.c to verifier.c.
> > So we're going to proceed.
> >
> > > I'm guessing you still haven't looked at Blaise's patchset from last
> > > September.
> >
> > Blaise approach was Nacked because you guys ignored TOCTOU issue.
> > I pointed it a year ago before AI was a thing. Then sashiko
> > pointed it again and the bot explained it in detail. It was again
> > ignored.
>
> Agreed, with Alexei: I like Daniel's solution because it avoids the
> complexity of machine independent BTF for loader programs and still
> addresses the signing goal, without the TOCTOU. I am okay with the
> current implementation as well. I think having kfuncs in loader
> programs would be useful, but we don't need to rush it or tie it to
> the signing effort.

As I've said a couple of times now in Daniel's approach, I like the
basic idea in Daniel's approach; it aligns with the same scheme we
have been advocating for all along: run the PKCS7 signature over both
the lskel loader and the maps.  I know you keep mentioning a TOCTOU
bug in that September patchset, but the main objection to all the
different variants of Blaise's patches was always the same basic idea:
you and Alexei wanted to verify the maps in the lskel loader BPF code,
while we wanted to directly verify the maps as part of the PKCS7
signature.  Any TOCTOU issues were implementation details; the
difference in the signature scheme was the sticking point in many of
the arguments.  For whatever it is worth, you and Alexei were not
immune to TOCTOU issues either.  The frozen map problem was mentioned
on-list and ignored.  We had to submit a PoC/report to security@kernel
to get it taken seriously and patched.

Regardless, of how we got here and whether or not you recognize
Daniel's signature scheme as a retread of Blaise's scheme, the point
is that you've come around to a concept we have advocated for over a
year.  If you plan to merge it, good (although I do wonder what Linus
will think of the userspace breakage caused by abandoning the
signature scheme that shipped in v6.18, maybe it's not a big deal, who
knows).  However, and this is important, we can't move the
security_bpf_prog_load() function to where Daniel proposed in his v2
patchset.  The issue is that non-trivial work happens, e.g. the
potential allocation of a buffer to hold up to one million eBPF
instructions, and we want to ensure we have an LSM access control
point before that happens.  If you, Daniel, and Alexei want to move
the bpf_prog_verify_signature() call into the verifier that's fine.
However, the security_bpf_prog_load() call needs to stay where it is.
It served an important purpose well before signed BPF programs were
supported, and adding BPF signatures doesn't invalidate that original
purpose.  If you want an LSM hook *after* the
bpf_prog_verify_signature() call and you oppose adding a new hook for
this new functionality, use the security_bpf_prog() hook.  It isn't
quite as conceptually clean of a solution as adding a new hook, but it
should work to allow you to control access to a BPF program based on
the signature verification result.

I'll support Daniel's patchset as long as you don't tamper with the
security_bpf_prog_load() hook.  If you move the hook to where it is
currently proposed in Daniel's v2 patchset I will have to object.  Do
we really want to keep going around on this stupid, petty carousel of
bickering and constant NACKs?

-- 
paul-moore.com

^ permalink raw reply

* [PATCH v2 stable/linux-6.12.y 0/2] Backport Fix incorrect overlayfs mmap() and mprotect() LSM access controls
From: Cai Xinchen @ 2026-06-27  4:28 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: linux-fsdevel, linux-kernel, linux-unionfs, linux-security-module,
	selinux, bpf, stable, lujialin4

v2: Add static to struct kmem_cache *lsm_backing_file_cache; and define
lbs_backing_file as int for keeping the same type as 6.12.

Backport the patch series
"Fix incorrect overlayfs mmap() and mprotect() LSM access controls" [1]
to 6.12 lts

I test selinux-testsuite[2] overlay test, it pass 135 tests.

[1] https://lore.kernel.org/all/20260403030848.731867-5-paul@paul-moore.com/
[2] https://github.com/SELinuxProject/selinux-testsuite

Paul Moore (2):
  lsm: add backing_file LSM hooks
  selinux: fix overlayfs mmap() and mprotect() access checks

 fs/backing-file.c                 |  18 ++-
 fs/file_table.c                   |  27 +++-
 fs/fuse/passthrough.c             |   2 +-
 fs/internal.h                     |   3 +-
 fs/overlayfs/dir.c                |   2 +-
 fs/overlayfs/file.c               |   3 +-
 include/linux/backing-file.h      |   4 +-
 include/linux/fs.h                |  13 ++
 include/linux/lsm_audit.h         |   2 +-
 include/linux/lsm_hook_defs.h     |   5 +
 include/linux/lsm_hooks.h         |   1 +
 include/linux/security.h          |  22 +++
 security/security.c               | 109 ++++++++++++++
 security/selinux/hooks.c          | 242 ++++++++++++++++++++++--------
 security/selinux/include/objsec.h |  11 ++
 15 files changed, 384 insertions(+), 80 deletions(-)

-- 
2.18.0.huawei.25


^ permalink raw reply

* [PATCH v2 stable/linux-6.12.y 1/2] lsm: add backing_file LSM hooks
From: Cai Xinchen @ 2026-06-27  4:28 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: linux-fsdevel, linux-kernel, linux-unionfs, linux-security-module,
	selinux, bpf, stable, lujialin4
In-Reply-To: <20260627042835.1492435-1-caixinchen1@huawei.com>

From: Paul Moore <paul@paul-moore.com>

[ Upstream commit 6af36aeb147a06dea47c49859cd6ca5659aeb987 ]

Stacked filesystems such as overlayfs do not currently provide the
necessary mechanisms for LSMs to properly enforce access controls on the
mmap() and mprotect() operations.  In order to resolve this gap, a LSM
security blob is being added to the backing_file struct and the following
new LSM hooks are being created:

 security_backing_file_alloc()
 security_backing_file_free()
 security_mmap_backing_file()

The first two hooks are to manage the lifecycle of the LSM security blob
in the backing_file struct, while the third provides a new mmap() access
control point for the underlying backing file.  It is also expected that
LSMs will likely want to update their security_file_mprotect() callback
to address issues with their mprotect() controls, but that does not
require a change to the security_file_mprotect() LSM hook.

There are a three other small changes to support these new LSM hooks:
* Pass the user file associated with a backing file down to
alloc_empty_backing_file() so it can be included in the
security_backing_file_alloc() hook.
* Add getter and setter functions for the backing_file struct LSM blob
as the backing_file struct remains private to fs/file_table.c.
* Constify the file struct field in the LSM common_audit_data struct to
better support LSMs that need to pass a const file struct pointer into
the common LSM audit code.

Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
and supplying a fixup.

Cc: stable@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-unionfs@vger.kernel.org
Cc: linux-erofs@lists.ozlabs.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Paul Moore <paul@paul-moore.com>
[ Mainline declares lsm_backing_file_cache in security/lsm.h.  Linux 6.12.y
does not have security/lsm_init.c or security/lsm.h; the cache variable
is defined locally as static struct kmem_cache *lsm_backing_file_cache in
security/security.c. ]
Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
---
 fs/backing-file.c             |  18 ++++--
 fs/file_table.c               |  27 +++++++--
 fs/fuse/passthrough.c         |   2 +-
 fs/internal.h                 |   3 +-
 fs/overlayfs/dir.c            |   2 +-
 fs/overlayfs/file.c           |   3 +-
 include/linux/backing-file.h  |   4 +-
 include/linux/fs.h            |  13 ++++
 include/linux/lsm_audit.h     |   2 +-
 include/linux/lsm_hook_defs.h |   5 ++
 include/linux/lsm_hooks.h     |   1 +
 include/linux/security.h      |  22 +++++++
 security/security.c           | 109 ++++++++++++++++++++++++++++++++++
 13 files changed, 195 insertions(+), 16 deletions(-)

diff --git a/fs/backing-file.c b/fs/backing-file.c
index 09a9be945d45..389cfe3ed509 100644
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@ -12,6 +12,7 @@
 #include <linux/backing-file.h>
 #include <linux/splice.h>
 #include <linux/mm.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -29,14 +30,15 @@
  * returned file into a container structure that also stores the stacked
  * file's path, which can be retrieved using backing_file_user_path().
  */
-struct file *backing_file_open(const struct path *user_path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
 			       const struct path *real_path,
 			       const struct cred *cred)
 {
+	const struct path *user_path = &user_file->f_path;
 	struct file *f;
 	int error;
 
-	f = alloc_empty_backing_file(flags, cred);
+	f = alloc_empty_backing_file(flags, cred, user_file);
 	if (IS_ERR(f))
 		return f;
 
@@ -52,15 +54,16 @@ struct file *backing_file_open(const struct path *user_path, int flags,
 }
 EXPORT_SYMBOL_GPL(backing_file_open);
 
-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
+struct file *backing_tmpfile_open(const struct file *user_file, int flags,
 				  const struct path *real_parentpath,
 				  umode_t mode, const struct cred *cred)
 {
 	struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
+	const struct path *user_path = &user_file->f_path;
 	struct file *f;
 	int error;
 
-	f = alloc_empty_backing_file(flags, cred);
+	f = alloc_empty_backing_file(flags, cred, user_file);
 	if (IS_ERR(f))
 		return f;
 
@@ -326,6 +329,7 @@ EXPORT_SYMBOL_GPL(backing_file_splice_write);
 int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
 		      struct backing_file_ctx *ctx)
 {
+	struct file *user_file = vma->vm_file;
 	const struct cred *old_cred;
 	int ret;
 
@@ -339,6 +343,12 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
 	vma_set_file(vma, file);
 
 	old_cred = override_creds(ctx->cred);
+	ret = security_mmap_backing_file(vma, file, user_file);
+	if (ret) {
+		revert_creds(old_cred);
+		return ret;
+	}
+
 	ret = call_mmap(vma->vm_file, vma);
 	revert_creds(old_cred);
 
diff --git a/fs/file_table.c b/fs/file_table.c
index 2a08bc93b0b9..a771be30eb43 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -46,6 +46,9 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 struct backing_file {
 	struct file file;
 	struct path user_path;
+#ifdef CONFIG_SECURITY
+	void *security;
+#endif
 };
 
 static inline struct backing_file *backing_file(struct file *f)
@@ -59,8 +62,21 @@ struct path *backing_file_user_path(struct file *f)
 }
 EXPORT_SYMBOL_GPL(backing_file_user_path);
 
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f)
+{
+	return backing_file(f)->security;
+}
+
+void backing_file_set_security(struct file *f, void *security)
+{
+	backing_file(f)->security = security;
+}
+#endif /* CONFIG_SECURITY */
+
 static inline void backing_file_free(struct backing_file *ff)
 {
+	security_backing_file_free(&ff->file);
 	path_put(&ff->user_path);
 	kfree(ff);
 }
@@ -259,10 +275,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
 	return f;
 }
 
-static int init_backing_file(struct backing_file *ff)
+static int init_backing_file(struct backing_file *ff,
+			     const struct file *user_file)
 {
 	memset(&ff->user_path, 0, sizeof(ff->user_path));
-	return 0;
+	backing_file_set_security(&ff->file, NULL);
+	return security_backing_file_alloc(&ff->file, user_file);
 }
 
 /*
@@ -272,7 +290,8 @@ static int init_backing_file(struct backing_file *ff)
  * This is only for kernel internal use, and the allocate file must not be
  * installed into file tables or such.
  */
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+				      const struct file *user_file)
 {
 	struct backing_file *ff;
 	int error;
@@ -289,7 +308,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
 
 	/* The f_mode flags must be set before fput(). */
 	ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
-	error = init_backing_file(ff);
+	error = init_backing_file(ff, user_file);
 	if (unlikely(error)) {
 		fput(&ff->file);
 		return ERR_PTR(error);
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 6bfd09dda9e3..140e150be0de 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -326,7 +326,7 @@ struct fuse_backing *fuse_passthrough_open(struct file *file,
 		goto out;
 
 	/* Allocate backing file per fuse file to store fuse path */
-	backing_file = backing_file_open(&file->f_path, file->f_flags,
+	backing_file = backing_file_open(file, file->f_flags,
 					 &fb->file->f_path, fb->cred);
 	err = PTR_ERR(backing_file);
 	if (IS_ERR(backing_file)) {
diff --git a/fs/internal.h b/fs/internal.h
index 8c1b7acbbe8f..89a15e0f2f1b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -99,7 +99,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
  */
 struct file *alloc_empty_file(int flags, const struct cred *cred);
 struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+				      const struct file *user_file);
 
 static inline void file_put_write_access(struct file *file)
 {
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ab65e98a1def..1c8009bf194b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1320,7 +1320,7 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
 		goto out_revert_creds;
 
 	ovl_path_upper(dentry->d_parent, &realparentpath);
-	realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath,
+	realfile = backing_tmpfile_open(file, flags, &realparentpath,
 					mode, current_cred());
 	err = PTR_ERR_OR_ZERO(realfile);
 	pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 94095058da34..3765e1defa19 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -47,8 +47,7 @@ static struct file *ovl_open_realfile(const struct file *file,
 	} else {
 		if (!inode_owner_or_capable(real_idmap, realinode))
 			flags &= ~O_NOATIME;
-
-		realfile = backing_file_open(file_user_path((struct file *) file),
+		realfile = backing_file_open(file,
 					     flags, realpath, current_cred());
 	}
 	revert_creds(old_cred);
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
index 2eed0ffb5e8f..cd18acd7ac5b 100644
--- a/include/linux/backing-file.h
+++ b/include/linux/backing-file.h
@@ -19,10 +19,10 @@ struct backing_file_ctx {
 	void (*end_write)(struct file *, loff_t, ssize_t);
 };
 
-struct file *backing_file_open(const struct path *user_path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
 			       const struct path *real_path,
 			       const struct cred *cred);
-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
+struct file *backing_tmpfile_open(const struct file *user_file, int flags,
 				  const struct path *real_parentpath,
 				  umode_t mode, const struct cred *cred);
 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 87720e1b5419..d06c2d829877 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2740,6 +2740,19 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 			   const struct cred *cred);
 struct path *backing_file_user_path(struct file *f);
 
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f);
+void backing_file_set_security(struct file *f, void *security);
+#else
+static inline void *backing_file_security(const struct file *f)
+{
+	return NULL;
+}
+static inline void backing_file_set_security(struct file *f, void *security)
+{
+}
+#endif /* CONFIG_SECURITY */
+
 /*
  * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
  * stored in ->vm_file is a backing file whose f_inode is on the underlying
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 97a8b21eb033..c0a2839253fa 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -93,7 +93,7 @@ struct common_audit_data {
 #endif
 		char *kmod_name;
 		struct lsm_ioctlop_audit *op;
-		struct file *file;
+		const struct file *file;
 		struct lsm_ibpkey_audit *ibpkey;
 		struct lsm_ibendport_audit *ibendport;
 		int reason;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 9eca013aa5e1..addb34abffa1 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -188,6 +188,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
 LSM_HOOK(int, 0, file_alloc_security, struct file *file)
 LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
 LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
+LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
+	 const struct file *user_file)
+LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
 LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
 	 unsigned long arg)
 LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
@@ -195,6 +198,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
 LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
 LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
 	 unsigned long prot, unsigned long flags)
+LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
+	 struct file *backing_file, struct file *user_file)
 LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
 	 unsigned long reqprot, unsigned long prot)
 LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 090d1d3e19fe..0876cf11e200 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -104,6 +104,7 @@ struct security_hook_list {
 struct lsm_blob_sizes {
 	int lbs_cred;
 	int lbs_file;
+	int lbs_backing_file;
 	int lbs_ib;
 	int lbs_inode;
 	int lbs_sock;
diff --git a/include/linux/security.h b/include/linux/security.h
index 2c6db949ad1a..e4300f2ff11b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -421,11 +421,17 @@ int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_release(struct file *file);
 void security_file_free(struct file *file);
+int security_backing_file_alloc(struct file *backing_file,
+				const struct file *user_file);
+void security_backing_file_free(struct file *backing_file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int security_file_ioctl_compat(struct file *file, unsigned int cmd,
 			       unsigned long arg);
 int security_mmap_file(struct file *file, unsigned long prot,
 			unsigned long flags);
+int security_mmap_backing_file(struct vm_area_struct *vma,
+			       struct file *backing_file,
+			       struct file *user_file);
 int security_mmap_addr(unsigned long addr);
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
@@ -1065,6 +1071,15 @@ static inline void security_file_release(struct file *file)
 static inline void security_file_free(struct file *file)
 { }
 
+static inline int security_backing_file_alloc(struct file *backing_file,
+					      const struct file *user_file)
+{
+	return 0;
+}
+
+static inline void security_backing_file_free(struct file *backing_file)
+{ }
+
 static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 				      unsigned long arg)
 {
@@ -1084,6 +1099,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
 	return 0;
 }
 
+static inline int security_mmap_backing_file(struct vm_area_struct *vma,
+					     struct file *backing_file,
+					     struct file *user_file)
+{
+	return 0;
+}
+
 static inline int security_mmap_addr(unsigned long addr)
 {
 	return cap_mmap_addr(addr);
diff --git a/security/security.c b/security/security.c
index 6e4deac6ec07..dd6b922c12de 100644
--- a/security/security.c
+++ b/security/security.c
@@ -95,6 +95,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = {
 static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
 
 static struct kmem_cache *lsm_file_cache;
+static struct kmem_cache *lsm_backing_file_cache;
 static struct kmem_cache *lsm_inode_cache;
 
 char *lsm_names;
@@ -266,6 +267,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
 
 	lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
 	lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
+	lsm_set_blob_size(&needed->lbs_backing_file, &blob_sizes.lbs_backing_file);
 	lsm_set_blob_size(&needed->lbs_ib, &blob_sizes.lbs_ib);
 	/*
 	 * The inode blob gets an rcu_head in addition to
@@ -468,6 +470,7 @@ static void __init ordered_lsm_init(void)
 
 	init_debug("cred blob size       = %d\n", blob_sizes.lbs_cred);
 	init_debug("file blob size       = %d\n", blob_sizes.lbs_file);
+	init_debug("lsm_backing_file_cache	 = %d\n", blob_sizes.lbs_backing_file);
 	init_debug("ib blob size         = %d\n", blob_sizes.lbs_ib);
 	init_debug("inode blob size      = %d\n", blob_sizes.lbs_inode);
 	init_debug("ipc blob size        = %d\n", blob_sizes.lbs_ipc);
@@ -490,6 +493,11 @@ static void __init ordered_lsm_init(void)
 		lsm_file_cache = kmem_cache_create("lsm_file_cache",
 						   blob_sizes.lbs_file, 0,
 						   SLAB_PANIC, NULL);
+	if (blob_sizes.lbs_backing_file)
+		lsm_backing_file_cache = kmem_cache_create(
+						   "lsm_backing_file_cache",
+						   blob_sizes.lbs_backing_file,
+						   0, SLAB_PANIC, NULL);
 	if (blob_sizes.lbs_inode)
 		lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
 						    blob_sizes.lbs_inode, 0,
@@ -666,6 +674,30 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
 
+/**
+ * lsm_backing_file_alloc - allocate a composite backing file blob
+ * @backing_file: the backing file
+ *
+ * Allocate the backing file blob for all the modules.
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_backing_file_alloc(struct file *backing_file)
+{
+	void *blob;
+
+	if (!lsm_backing_file_cache) {
+		backing_file_set_security(backing_file, NULL);
+		return 0;
+	}
+
+	blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
+	backing_file_set_security(backing_file, blob);
+	if (!blob)
+		return -ENOMEM;
+	return 0;
+}
+
 /**
  * lsm_blob_alloc - allocate a composite blob
  * @dest: the destination for the blob
@@ -2893,6 +2925,57 @@ void security_file_free(struct file *file)
 	}
 }
 
+/**
+ * security_backing_file_alloc() - Allocate and setup a backing file blob
+ * @backing_file: the backing file
+ * @user_file: the associated user visible file
+ *
+ * Allocate a backing file LSM blob and perform any necessary initialization of
+ * the LSM blob.  There will be some operations where the LSM will not have
+ * access to @user_file after this point, so any important state associated
+ * with @user_file that is important to the LSM should be captured in the
+ * backing file's LSM blob.
+ *
+ * LSM's should avoid taking a reference to @user_file in this hook as it will
+ * result in problems later when the system attempts to drop/put the file
+ * references due to a circular dependency.
+ *
+ * Return: Return 0 if the hook is successful, negative values otherwise.
+ */
+int security_backing_file_alloc(struct file *backing_file,
+				const struct file *user_file)
+{
+	int rc;
+
+	rc = lsm_backing_file_alloc(backing_file);
+	if (rc)
+		return rc;
+	rc = call_int_hook(backing_file_alloc, backing_file, user_file);
+	if (unlikely(rc))
+		security_backing_file_free(backing_file);
+
+	return rc;
+}
+
+/**
+ * security_backing_file_free() - Free a backing file blob
+ * @backing_file: the backing file
+ *
+ * Free any LSM state associate with a backing file's LSM blob, including the
+ * blob itself.
+ */
+void security_backing_file_free(struct file *backing_file)
+{
+	void *blob = backing_file_security(backing_file);
+
+	call_void_hook(backing_file_free, backing_file);
+
+	if (blob) {
+		backing_file_set_security(backing_file, NULL);
+		kmem_cache_free(lsm_backing_file_cache, blob);
+	}
+}
+
 /**
  * security_file_ioctl() - Check if an ioctl is allowed
  * @file: associated file
@@ -2981,6 +3064,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
 			     flags);
 }
 
+/**
+ * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
+ * @vma: the vm_area_struct for the mmap'd region
+ * @backing_file: the backing file being mmap'd
+ * @user_file: the user file being mmap'd
+ *
+ * Check permissions for a mmap operation on a stacked filesystem.  This hook
+ * is called after the security_mmap_file() and is responsible for authorizing
+ * the mmap on @backing_file.  It is important to note that the mmap operation
+ * on @user_file has already been authorized and the @vma->vm_file has been
+ * set to @backing_file.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_mmap_backing_file(struct vm_area_struct *vma,
+			       struct file *backing_file,
+			       struct file *user_file)
+{
+	/* recommended by the stackable filesystem devs */
+	if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
+		return -EIO;
+
+	return call_int_hook(mmap_backing_file, vma, backing_file, user_file);
+}
+EXPORT_SYMBOL_GPL(security_mmap_backing_file);
+
 /**
  * security_mmap_addr() - Check if mmap'ing an address is allowed
  * @addr: address
-- 
2.18.0.huawei.25


^ permalink raw reply related

* [PATCH v2 stable/linux-6.12.y 2/2] selinux: fix overlayfs mmap() and mprotect() access checks
From: Cai Xinchen @ 2026-06-27  4:28 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: linux-fsdevel, linux-kernel, linux-unionfs, linux-security-module,
	selinux, bpf, stable, lujialin4
In-Reply-To: <20260627042835.1492435-1-caixinchen1@huawei.com>

From: Paul Moore <paul@paul-moore.com>

[ Upstream commit 82544d36b1729153c8aeb179e84750f0c085d3b1 ]

The existing SELinux security model for overlayfs is to allow access if
the current task is able to access the top level file (the "user" file)
and the mounter's credentials are sufficient to access the lower
level file (the "backing" file).  Unfortunately, the current code does
not properly enforce these access controls for both mmap() and mprotect()
operations on overlayfs filesystems.

This patch makes use of the newly created security_mmap_backing_file()
LSM hook to provide the missing backing file enforcement for mmap()
operations, and leverages the backing file API and new LSM blob to
provide the necessary information to properly enforce the mprotect()
access controls.

Cc: stable@vger.kernel.org
Acked-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
---
 security/selinux/hooks.c          | 242 ++++++++++++++++++++++--------
 security/selinux/include/objsec.h |  11 ++
 2 files changed, 189 insertions(+), 64 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8e31d3b60fc6..1b89c8d5fa2f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1724,49 +1724,72 @@ static inline int file_path_has_perm(const struct cred *cred,
 static int bpf_fd_pass(const struct file *file, u32 sid);
 #endif
 
-/* Check whether a task can use an open file descriptor to
-   access an inode in a given way.  Check access to the
-   descriptor itself, and then use dentry_has_perm to
-   check a particular permission to the file.
-   Access to the descriptor is implicitly granted if it
-   has the same SID as the process.  If av is zero, then
-   access to the file is not checked, e.g. for cases
-   where only the descriptor is affected like seek. */
-static int file_has_perm(const struct cred *cred,
-			 struct file *file,
-			 u32 av)
+static int __file_has_perm(const struct cred *cred, const struct file *file,
+			   u32 av, bool bf_user_file)
+
 {
-	struct file_security_struct *fsec = selinux_file(file);
-	struct inode *inode = file_inode(file);
 	struct common_audit_data ad;
-	u32 sid = cred_sid(cred);
+	struct inode *inode;
+	u32 ssid = cred_sid(cred);
+	u32 tsid_fd;
 	int rc;
 
-	ad.type = LSM_AUDIT_DATA_FILE;
-	ad.u.file = file;
+	if (bf_user_file) {
+		struct backing_file_security_struct *bfsec;
+		const struct path *path;
 
-	if (sid != fsec->sid) {
-		rc = avc_has_perm(sid, fsec->sid,
-				  SECCLASS_FD,
-				  FD__USE,
-				  &ad);
+		if (WARN_ON(!(file->f_mode & FMODE_BACKING)))
+			return -EIO;
+
+		bfsec = selinux_backing_file(file);
+		path = backing_file_user_path(file);
+		tsid_fd = bfsec->uf_sid;
+		inode = d_inode(path->dentry);
+
+		ad.type = LSM_AUDIT_DATA_PATH;
+		ad.u.path = *path;
+	} else {
+		struct file_security_struct *fsec = selinux_file(file);
+
+		tsid_fd = fsec->sid;
+		inode = file_inode(file);
+
+		ad.type = LSM_AUDIT_DATA_FILE;
+		ad.u.file = file;
+	}
+
+	if (ssid != tsid_fd) {
+		rc = avc_has_perm(ssid, tsid_fd, SECCLASS_FD, FD__USE, &ad);
 		if (rc)
-			goto out;
+			return rc;
 	}
 
 #ifdef CONFIG_BPF_SYSCALL
-	rc = bpf_fd_pass(file, cred_sid(cred));
+	/* regardless of backing vs user file, use the underlying file here */
+	rc = bpf_fd_pass(file, ssid);
 	if (rc)
 		return rc;
 #endif
 
 	/* av is zero if only checking access to the descriptor. */
-	rc = 0;
 	if (av)
-		rc = inode_has_perm(cred, inode, av, &ad);
+		return inode_has_perm(cred, inode, av, &ad);
 
-out:
-	return rc;
+	return 0;
+}
+
+/* Check whether a task can use an open file descriptor to
+   access an inode in a given way.  Check access to the
+   descriptor itself, and then use dentry_has_perm to
+   check a particular permission to the file.
+   Access to the descriptor is implicitly granted if it
+   has the same SID as the process.  If av is zero, then
+   access to the file is not checked, e.g. for cases
+   where only the descriptor is affected like seek. */
+static inline int file_has_perm(const struct cred *cred,
+				const struct file *file, u32 av)
+{
+	return __file_has_perm(cred, file, av, false);
 }
 
 /*
@@ -3653,6 +3676,17 @@ static int selinux_file_alloc_security(struct file *file)
 	return 0;
 }
 
+static int selinux_backing_file_alloc(struct file *backing_file,
+				      const struct file *user_file)
+{
+	struct backing_file_security_struct *bfsec;
+
+	bfsec = selinux_backing_file(backing_file);
+	bfsec->uf_sid = selinux_file(user_file)->sid;
+
+	return 0;
+}
+
 /*
  * Check whether a task has the ioctl permission and cmd
  * operation to an inode.
@@ -3770,42 +3804,55 @@ static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
 
 static int default_noexec __ro_after_init;
 
-static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
+static int __file_map_prot_check(const struct cred *cred,
+				 const struct file *file, unsigned long prot,
+				 bool shared, bool bf_user_file)
 {
-	const struct cred *cred = current_cred();
-	u32 sid = cred_sid(cred);
-	int rc = 0;
+	struct inode *inode = NULL;
+	bool prot_exec = prot & PROT_EXEC;
+	bool prot_write = prot & PROT_WRITE;
+
+	if (file) {
+		if (bf_user_file)
+			inode = d_inode(backing_file_user_path(file)->dentry);
+		else
+			inode = file_inode(file);
+	}
+
+	if (default_noexec && prot_exec &&
+	    (!file || IS_PRIVATE(inode) || (!shared && prot_write))) {
+		int rc;
+		u32 sid = cred_sid(cred);
 
-	if (default_noexec &&
-	    (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
-				   (!shared && (prot & PROT_WRITE)))) {
 		/*
-		 * We are making executable an anonymous mapping or a
-		 * private file mapping that will also be writable.
-		 * This has an additional check.
+		 * We are making executable an anonymous mapping or a private
+		 * file mapping that will also be writable.
 		 */
-		rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
-				  PROCESS__EXECMEM, NULL);
+		rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM,
+				  NULL);
 		if (rc)
-			goto error;
+			return rc;
 	}
 
 	if (file) {
-		/* read access is always possible with a mapping */
+		/* "read" always possible, "write" only if shared */
 		u32 av = FILE__READ;
-
-		/* write access only matters if the mapping is shared */
-		if (shared && (prot & PROT_WRITE))
+		if (shared && prot_write)
 			av |= FILE__WRITE;
-
-		if (prot & PROT_EXEC)
+		if (prot_exec)
 			av |= FILE__EXECUTE;
 
-		return file_has_perm(cred, file, av);
+		return __file_has_perm(cred, file, av, bf_user_file);
 	}
 
-error:
-	return rc;
+	return 0;
+}
+
+static inline int file_map_prot_check(const struct cred *cred,
+				      const struct file *file,
+				      unsigned long prot, bool shared)
+{
+	return __file_map_prot_check(cred, file, prot, shared, false);
 }
 
 static int selinux_mmap_addr(unsigned long addr)
@@ -3821,36 +3868,80 @@ static int selinux_mmap_addr(unsigned long addr)
 	return rc;
 }
 
-static int selinux_mmap_file(struct file *file,
-			     unsigned long reqprot __always_unused,
-			     unsigned long prot, unsigned long flags)
+static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
+				    unsigned long prot, bool shared)
 {
-	struct common_audit_data ad;
-	int rc;
-
 	if (file) {
+		int rc;
+		struct common_audit_data ad;
+
 		ad.type = LSM_AUDIT_DATA_FILE;
 		ad.u.file = file;
-		rc = inode_has_perm(current_cred(), file_inode(file),
-				    FILE__MAP, &ad);
+		rc = inode_has_perm(cred, file_inode(file), FILE__MAP, &ad);
 		if (rc)
 			return rc;
 	}
 
-	return file_map_prot_check(file, prot,
-				   (flags & MAP_TYPE) == MAP_SHARED);
+	return file_map_prot_check(cred, file, prot, shared);
+}
+
+static int selinux_mmap_file(struct file *file,
+			     unsigned long reqprot __always_unused,
+			     unsigned long prot, unsigned long flags)
+{
+	return selinux_mmap_file_common(current_cred(), file, prot,
+					(flags & MAP_TYPE) == MAP_SHARED);
+}
+
+/**
+ * selinux_mmap_backing_file - Check mmap permissions on a backing file
+ * @vma: memory region
+ * @backing_file: stacked filesystem backing file
+ * @user_file: user visible file
+ *
+ * This is called after selinux_mmap_file() on stacked filesystems, and it
+ * is this function's responsibility to verify access to @backing_file and
+ * setup the SELinux state for possible later use in the mprotect() code path.
+ *
+ * By the time this function is called, mmap() access to @user_file has already
+ * been authorized and @vma->vm_file has been set to point to @backing_file.
+ *
+ * Return zero on success, negative values otherwise.
+ */
+static int selinux_mmap_backing_file(struct vm_area_struct *vma,
+				     struct file *backing_file,
+				     struct file *user_file __always_unused)
+{
+	unsigned long prot = 0;
+
+	/* translate vma->vm_flags perms into PROT perms */
+	if (vma->vm_flags & VM_READ)
+		prot |= PROT_READ;
+	if (vma->vm_flags & VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vma->vm_flags & VM_EXEC)
+		prot |= PROT_EXEC;
+
+	return selinux_mmap_file_common(backing_file->f_cred, backing_file,
+					prot, vma->vm_flags & VM_SHARED);
 }
 
 static int selinux_file_mprotect(struct vm_area_struct *vma,
 				 unsigned long reqprot __always_unused,
 				 unsigned long prot)
 {
+	int rc;
 	const struct cred *cred = current_cred();
 	u32 sid = cred_sid(cred);
+	const struct file *file = vma->vm_file;
+	bool backing_file;
+	bool shared = vma->vm_flags & VM_SHARED;
+
+	/* check if we need to trigger the "backing files are awful" mode */
+	backing_file = file && (file->f_mode & FMODE_BACKING);
 
 	if (default_noexec &&
 	    (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
-		int rc = 0;
 		/*
 		 * We don't use the vma_is_initial_heap() helper as it has
 		 * a history of problems and is currently broken on systems
@@ -3864,11 +3955,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
 		    vma->vm_end <= vma->vm_mm->brk) {
 			rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
 					  PROCESS__EXECHEAP, NULL);
-		} else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
+			if (rc)
+				return rc;
+		} else if (!file && (vma_is_initial_stack(vma) ||
 			    vma_is_stack_for_current(vma))) {
 			rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
 					  PROCESS__EXECSTACK, NULL);
-		} else if (vma->vm_file && vma->anon_vma) {
+			if (rc)
+				return rc;
+		} else if (file && vma->anon_vma) {
 			/*
 			 * We are making executable a file mapping that has
 			 * had some COW done. Since pages might have been
@@ -3876,13 +3971,29 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
 			 * modified content.  This typically should only
 			 * occur for text relocations.
 			 */
-			rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
+			rc = __file_has_perm(cred, file, FILE__EXECMOD,
+					     backing_file);
+			if (rc)
+				return rc;
+			if (backing_file) {
+				rc = file_has_perm(file->f_cred, file,
+						   FILE__EXECMOD);
+				if (rc)
+					return rc;
+			}
 		}
+	}
+
+	rc = __file_map_prot_check(cred, file, prot, shared, backing_file);
+	if (rc)
+		return rc;
+	if (backing_file) {
+		rc = file_map_prot_check(file->f_cred, file, prot, shared);
 		if (rc)
 			return rc;
 	}
 
-	return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
+	return 0;
 }
 
 static int selinux_file_lock(struct file *file, unsigned int cmd)
@@ -6960,6 +7071,7 @@ static void selinux_bpf_token_free(struct bpf_token *token)
 struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
 	.lbs_cred = sizeof(struct task_security_struct),
 	.lbs_file = sizeof(struct file_security_struct),
+	.lbs_backing_file = sizeof(struct backing_file_security_struct),
 	.lbs_inode = sizeof(struct inode_security_struct),
 	.lbs_ipc = sizeof(struct ipc_security_struct),
 	.lbs_key = sizeof(struct key_security_struct),
@@ -7165,9 +7277,11 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
 
 	LSM_HOOK_INIT(file_permission, selinux_file_permission),
 	LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
+	LSM_HOOK_INIT(backing_file_alloc, selinux_backing_file_alloc),
 	LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
 	LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
 	LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
+	LSM_HOOK_INIT(mmap_backing_file, selinux_mmap_backing_file),
 	LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
 	LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
 	LSM_HOOK_INIT(file_lock, selinux_file_lock),
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index c88cae81ee4c..dc42282a2c05 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -61,6 +61,10 @@ struct file_security_struct {
 	u32 pseqno; /* Policy seqno at the time of file open */
 };
 
+struct backing_file_security_struct {
+	u32 uf_sid; /* associated user file fsec->sid */
+};
+
 struct superblock_security_struct {
 	u32 sid; /* SID of file system superblock */
 	u32 def_sid; /* default SID for labeling */
@@ -159,6 +163,13 @@ static inline struct file_security_struct *selinux_file(const struct file *file)
 	return file->f_security + selinux_blob_sizes.lbs_file;
 }
 
+static inline struct backing_file_security_struct *
+selinux_backing_file(const struct file *backing_file)
+{
+	void *blob = backing_file_security(backing_file);
+	return blob + selinux_blob_sizes.lbs_backing_file;
+}
+
 static inline struct inode_security_struct *
 selinux_inode(const struct inode *inode)
 {
-- 
2.18.0.huawei.25


^ permalink raw reply related

* Re: [PATCH] tomoyo: Enforce connect policy in TCP Fast Open
From: Tetsuo Handa @ 2026-06-27  5:28 UTC (permalink / raw)
  To: Matthieu Buffet
  Cc: Bryam Vargas, Mickaël Salaün, Günther Noack,
	linux-security-module, Mikhail Ivanov, Paul Moore, Yuchung Cheng,
	Eric Dumazet, netdev, Kentaro Takeda
In-Reply-To: <20260619002207.61104-1-matthieu@buffet.re>

On 2026/06/19 9:22, Matthieu Buffet wrote:
> Tomoyo restricted TCP connections in 2011 in commit
> 059d84dbb389 ("TOMOYO: Add socket operation restriction support.")
> using the socket_connect() LSM hook.
> 
> However, the MSG_FASTOPEN sendmsg() flag was added in 2012 to allow
> combining connect() and the first sendmsg(). Tomoyo was not updated to
> take this into account in its send hook.
> 
> This resulted in a TCP connect policy bypass similar to that reported in
> Landlock in 2024 (see Link below), with the difference that Tomoyo was
> fine when originally merged, and the problem got introduced when adding
> fastopen support, possibly due to lack of synchronization between lsm
> and netdev worlds.
> 
> Add MSG_FASTOPEN handling in Tomoyo's existing send hook.
> 
> Link: https://github.com/landlock-lsm/linux/issues/41
> Link: https://lore.kernel.org/all/20260616201615.275032-1-hexlabsecurity@proton.me/
> Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
> Cc: stable@kernel.org
> Signed-off-by: Matthieu Buffet <matthieu@buffet.re>
> ---
>  security/tomoyo/network.c | 16 +++++++++++++++-
>  1 file changed, 15 insertions(+), 1 deletion(-)
> 

Thank you for finding this problem and making a patch.
I updated your patch like below in order to exclude kernel threads from this check.
If we are OK to go with modifying individual LSM, I'll apply this change.

 security/tomoyo/network.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c
index cfc2a019de1e..50d27c773b10 100644
--- a/security/tomoyo/network.c
+++ b/security/tomoyo/network.c
@@ -765,6 +765,15 @@ int tomoyo_socket_sendmsg_permission(struct socket *sock, struct msghdr *msg,
 	const u8 family = tomoyo_sock_family(sock->sk);
 	const unsigned int type = sock->type;
 
+	if ((msg->msg_flags & MSG_FASTOPEN) && msg->msg_name && type == SOCK_STREAM &&
+	    (family == PF_INET || family == PF_INET6) &&
+	    (sock->sk->sk_protocol == IPPROTO_TCP || sock->sk->sk_protocol == IPPROTO_MPTCP)) {
+		address.protocol = SOCK_STREAM;
+		address.operation = TOMOYO_NETWORK_CONNECT;
+		return tomoyo_check_inet_address((struct sockaddr *)msg->msg_name,
+						 msg->msg_namelen, 0, &address);
+	}
+
 	if (!msg->msg_name || !family ||
 	    (type != SOCK_DGRAM && type != SOCK_RAW))
 		return 0;
-- 
2.54.0



^ permalink raw reply related

* Re: [syzbot] [tomoyo?] KMSAN: uninit-value in tomoyo_path_chown (3)
From: Tetsuo Handa @ 2026-06-27  5:43 UTC (permalink / raw)
  To: syzbot, syzkaller-bugs
  Cc: jmorris, linux-kernel, linux-security-module, paul, serge,
	takedakn, linux-fsdevel
In-Reply-To: <6a325dbd.dc986f81.2c135.0008.GAE@google.com>

On 2026/06/17 17:41, syzbot wrote:
> Hello,
> 
> syzbot found the following issue on:
> 
> HEAD commit:    2b414a95b8f7 Merge tag 's390-7.1-5' of git://git.kernel.or..
> git tree:       upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=12cff156580000
> kernel config:  https://syzkaller.appspot.com/x/.config?x=a0ca3b8cb3875012
> dashboard link: https://syzkaller.appspot.com/bug?extid=eaae8fa60ce81f1e4eeb
> compiler:       Debian clang version 21.1.8 (++20251221033036+2078da43e25a-1~exp1~20251221153213.50), Debian LLD 21.1.8
> 
> Unfortunately, I don't have any reproducer for this issue yet.
> 

The AI's analysis

  In this case, `chown_common()` skips setting `newattrs.ia_vfsuid` and `newattrs.ia_vfsgid`.

at https://syzkaller.appspot.com/ai_job?id=abea5dd9-2a6e-4669-a927-d87ef7833666 is wrong.
We always set newattrs.ia_vfsuid and newattrs.ia_vfsgid before checking for -1.

  retry_deleg:
  	newattrs.ia_vfsuid = INVALID_VFSUID;
  	newattrs.ia_vfsgid = INVALID_VFSGID;
  	newattrs.ia_valid =  ATTR_CTIME;
  	if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
  		return -EINVAL;
  	if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
  		return -EINVAL;

Therefore, unless VFS people find something that can cause this problem,
this would be a random memory corruption which confused KMSAN.

#syz set subsystems: unclassified

#syz set no-reminders


^ permalink raw reply

* Re: [syzbot] [tomoyo?] KMSAN: uninit-value in tomoyo_path_chown (3)
From: syzbot @ 2026-06-27  5:44 UTC (permalink / raw)
  To: penguin-kernel
  Cc: jmorris, linux-fsdevel, linux-kernel, linux-security-module, paul,
	penguin-kernel, serge, syzkaller-bugs, takedakn
In-Reply-To: <790d4d56-4c24-4468-b588-722e6aa86ba1@I-love.SAKURA.ne.jp>

> On 2026/06/17 17:41, syzbot wrote:
>> Hello,
>> 
>> syzbot found the following issue on:
>> 
>> HEAD commit:    2b414a95b8f7 Merge tag 's390-7.1-5' of git://git.kernel.or..
>> git tree:       upstream
>> console output: https://syzkaller.appspot.com/x/log.txt?x=12cff156580000
>> kernel config:  https://syzkaller.appspot.com/x/.config?x=a0ca3b8cb3875012
>> dashboard link: https://syzkaller.appspot.com/bug?extid=eaae8fa60ce81f1e4eeb
>> compiler:       Debian clang version 21.1.8 (++20251221033036+2078da43e25a-1~exp1~20251221153213.50), Debian LLD 21.1.8
>> 
>> Unfortunately, I don't have any reproducer for this issue yet.
>> 
>
> The AI's analysis
>
>   In this case, `chown_common()` skips setting `newattrs.ia_vfsuid` and `newattrs.ia_vfsgid`.
>
> at https://syzkaller.appspot.com/ai_job?id=abea5dd9-2a6e-4669-a927-d87ef7833666 is wrong.
> We always set newattrs.ia_vfsuid and newattrs.ia_vfsgid before checking for -1.
>
>   retry_deleg:
>   	newattrs.ia_vfsuid = INVALID_VFSUID;
>   	newattrs.ia_vfsgid = INVALID_VFSGID;
>   	newattrs.ia_valid =  ATTR_CTIME;
>   	if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
>   		return -EINVAL;
>   	if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
>   		return -EINVAL;
>
> Therefore, unless VFS people find something that can cause this problem,
> this would be a random memory corruption which confused KMSAN.
>
> #syz set subsystems: unclassified
>
> #syz set no-reminders
>

Command #1:
The specified label value is incorrect.
"unclassified" is not among the allowed values.
Please use one of the supported label values.

The following labels are suported:
actionable, missing-backport, no-reminders, prio: {low, normal, high}, subsystems: {..
see below ..}
The list of subsystems: https://syzkaller.appspot.com/upstream/subsystems?all=true


^ permalink raw reply

* [PATCH v2 stable/linux-6.6.y 0/3] Backport Fix incorrect overlayfs mmap() and mprotect() LSM access controls
From: Cai Xinchen @ 2026-06-27  6:57 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: linux-fsdevel, linux-kernel, linux-unionfs, linux-security-module,
	selinux, bpf, stable, lujialin4

v2: Add static to struct kmem_cache *lsm_backing_file_cache; and define
lbs_backing_file as int for keeping the same type as 6.6 lts. Use macro
backing_file to replace inline function to eliminate the const warning.

Backport the patch series
"Fix incorrect overlayfs mmap() and mprotect() LSM access controls" [1]
to 6.6 lts

I test selinux-testsuite[2] overlay test, it pass 135 tests.

[1] https://lore.kernel.org/all/20260403030848.731867-5-paul@paul-moore.com/
[2] https://github.com/SELinuxProject/selinux-testsuite

Amir Goldstein (1):
  fs: prepare for adding LSM blob to backing_file

Paul Moore (2):
  lsm: add backing_file LSM hooks
  selinux: fix overlayfs mmap() and mprotect() access checks

 fs/file_table.c                   |  46 +++++-
 fs/internal.h                     |   3 +-
 fs/open.c                         |   7 +-
 fs/overlayfs/file.c               |   8 +-
 include/linux/fs.h                |  15 +-
 include/linux/lsm_audit.h         |   2 +-
 include/linux/lsm_hook_defs.h     |   5 +
 include/linux/lsm_hooks.h         |   1 +
 include/linux/security.h          |  22 +++
 security/security.c               | 110 ++++++++++++++
 security/selinux/hooks.c          | 242 ++++++++++++++++++++++--------
 security/selinux/include/objsec.h |  11 ++
 12 files changed, 395 insertions(+), 77 deletions(-)

-- 
2.18.0.huawei.25


^ permalink raw reply

* [PATCH v2 stable/linux-6.6.y 1/3] fs: prepare for adding LSM blob to backing_file
From: Cai Xinchen @ 2026-06-27  6:57 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: linux-fsdevel, linux-kernel, linux-unionfs, linux-security-module,
	selinux, bpf, stable, lujialin4
In-Reply-To: <20260627065720.1945589-1-caixinchen1@huawei.com>

From: Amir Goldstein <amir73il@gmail.com>

[ Upstream commit 880bd496ec72a6dcb00cb70c430ef752ba242ae7 ]

In preparation to adding LSM blob to backing_file struct, factor out
helpers init_backing_file() and backing_file_free().

Cc: stable@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-unionfs@vger.kernel.org
Cc: linux-erofs@lists.ozlabs.org
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
[PM: use the term "LSM blob", fix comment style to match file]
Signed-off-by: Paul Moore <paul@paul-moore.com>
[1. The commit def3ae83da02
("fs: store real path instead of fake path in backing file f_path")
is not merged, The 6.6 LTS version accordingly operates on
&ff->real_path instead of &ff->user_path.

2. Mainline's file_free() does both the backing_file cleanup and the
kmem_cache_free() synchronously.  Linux 6.6.y defers the actual kfree()
to file_free_rcu() via call_rcu(), so only path_put() is done
synchronously in file_free().]
Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
---
 fs/file_table.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index 234284ef72a9..b4c208a77153 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -72,11 +72,16 @@ static void file_free_rcu(struct rcu_head *head)
 		kmem_cache_free(filp_cachep, f);
 }
 
+static inline void backing_file_free(struct backing_file *ff)
+{
+	path_put(&ff->real_path);
+}
+
 static inline void file_free(struct file *f)
 {
 	security_file_free(f);
 	if (unlikely(f->f_mode & FMODE_BACKING))
-		path_put(backing_file_real_path(f));
+		backing_file_free(backing_file(f));
 	if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
 		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_rcuhead, file_free_rcu);
@@ -252,6 +257,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
 	return f;
 }
 
+static int init_backing_file(struct backing_file *ff)
+{
+	memset(&ff->real_path, 0, sizeof(ff->real_path));
+	return 0;
+}
+
 /*
  * Variant of alloc_empty_file() that allocates a backing_file container
  * and doesn't check and modify nr_files.
@@ -274,7 +285,14 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
 		return ERR_PTR(error);
 	}
 
+	/* The f_mode flags must be set before fput(). */
 	ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
+	error = init_backing_file(ff);
+	if (unlikely(error)) {
+		fput(&ff->file);
+		return ERR_PTR(error);
+	}
+
 	return &ff->file;
 }
 
-- 
2.18.0.huawei.25


^ permalink raw reply related

* [PATCH v2 stable/linux-6.6.y 2/3] lsm: add backing_file LSM hooks
From: Cai Xinchen @ 2026-06-27  6:57 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: linux-fsdevel, linux-kernel, linux-unionfs, linux-security-module,
	selinux, bpf, stable, lujialin4
In-Reply-To: <20260627065720.1945589-1-caixinchen1@huawei.com>

From: Paul Moore <paul@paul-moore.com>

[ Upstream commit 6af36aeb147a06dea47c49859cd6ca5659aeb987 ]

Stacked filesystems such as overlayfs do not currently provide the
necessary mechanisms for LSMs to properly enforce access controls on the
mmap() and mprotect() operations.  In order to resolve this gap, a LSM
security blob is being added to the backing_file struct and the following
new LSM hooks are being created:

 security_backing_file_alloc()
 security_backing_file_free()
 security_mmap_backing_file()

The first two hooks are to manage the lifecycle of the LSM security blob
in the backing_file struct, while the third provides a new mmap() access
control point for the underlying backing file.  It is also expected that
LSMs will likely want to update their security_file_mprotect() callback
to address issues with their mprotect() controls, but that does not
require a change to the security_file_mprotect() LSM hook.

There are a three other small changes to support these new LSM hooks:
* Pass the user file associated with a backing file down to
alloc_empty_backing_file() so it can be included in the
security_backing_file_alloc() hook.
* Add getter and setter functions for the backing_file struct LSM blob
as the backing_file struct remains private to fs/file_table.c.
* Constify the file struct field in the LSM common_audit_data struct to
better support LSMs that need to pass a const file struct pointer into
the common LSM audit code.

Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
and supplying a fixup.

Cc: stable@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-unionfs@vger.kernel.org
Cc: linux-erofs@lists.ozlabs.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Paul Moore <paul@paul-moore.com>
[1. Mainline uses call_int_hook(FUNC, ...) with the default IRC baked
into the macro. Linux 6.6.y uses call_int_hook(FUNC, IRC, ...) requiring
an explicit default return value.

2. fs/backing-file.c does not exist in LTS
Linux 6.6.y places backing_file_open() in fs/open.c and lacks a
dedicated fs/backing-file.c.  The backing_file_mmap() function and
scoped_with_creds() do not exist in 6.6.y.  Therefore the LTS patch calls
security_mmap_backing_file() directly in ovl_mmap() in
fs/overlayfs/file.c rather than modifying backing_file_mmap().

3. Missing filesystems/modules
Linux 6.6.y does not have backing_tmpfile_open(), fs/fuse/passthrough.c,
or the erofs ishare mmap path that the mainline patch touches.  These hunks
are dropped in the 6.6 LTS backport.

4. Use macro backing_file to replace inline function to eliminate the
const warning.]
Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
---
 fs/file_table.c               |  32 +++++++---
 fs/internal.h                 |   3 +-
 fs/open.c                     |   7 ++-
 fs/overlayfs/file.c           |   8 ++-
 include/linux/fs.h            |  15 ++++-
 include/linux/lsm_audit.h     |   2 +-
 include/linux/lsm_hook_defs.h |   5 ++
 include/linux/lsm_hooks.h     |   1 +
 include/linux/security.h      |  22 +++++++
 security/security.c           | 110 ++++++++++++++++++++++++++++++++++
 10 files changed, 190 insertions(+), 15 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index b4c208a77153..fa4d4c54f790 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -48,12 +48,12 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 struct backing_file {
 	struct file file;
 	struct path real_path;
+#ifdef CONFIG_SECURITY
+	void *security;
+#endif
 };
 
-static inline struct backing_file *backing_file(struct file *f)
-{
-	return container_of(f, struct backing_file, file);
-}
+#define backing_file(f) container_of(f, struct backing_file, file)
 
 struct path *backing_file_real_path(struct file *f)
 {
@@ -72,8 +72,21 @@ static void file_free_rcu(struct rcu_head *head)
 		kmem_cache_free(filp_cachep, f);
 }
 
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f)
+{
+	return backing_file(f)->security;
+}
+
+void backing_file_set_security(struct file *f, void *security)
+{
+	backing_file(f)->security = security;
+}
+#endif /* CONFIG_SECURITY */
+
 static inline void backing_file_free(struct backing_file *ff)
 {
+	security_backing_file_free(&ff->file);
 	path_put(&ff->real_path);
 }
 
@@ -257,10 +270,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
 	return f;
 }
 
-static int init_backing_file(struct backing_file *ff)
+static int init_backing_file(struct backing_file *ff,
+			     const struct file *user_file)
 {
 	memset(&ff->real_path, 0, sizeof(ff->real_path));
-	return 0;
+	backing_file_set_security(&ff->file, NULL);
+	return security_backing_file_alloc(&ff->file, user_file);
 }
 
 /*
@@ -270,7 +285,8 @@ static int init_backing_file(struct backing_file *ff)
  * This is only for kernel internal use, and the allocate file must not be
  * installed into file tables or such.
  */
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+				      const struct file *user_file)
 {
 	struct backing_file *ff;
 	int error;
@@ -287,7 +303,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
 
 	/* The f_mode flags must be set before fput(). */
 	ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
-	error = init_backing_file(ff);
+	error = init_backing_file(ff, user_file);
 	if (unlikely(error)) {
 		fput(&ff->file);
 		return ERR_PTR(error);
diff --git a/fs/internal.h b/fs/internal.h
index d64ae03998cc..576026e1ce3a 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -93,7 +93,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
  */
 struct file *alloc_empty_file(int flags, const struct cred *cred);
 struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
+				      const struct file *user_file);
 
 static inline void put_file_access(struct file *file)
 {
diff --git a/fs/open.c b/fs/open.c
index b5ea1dcbfb22..5b164481d80b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1175,18 +1175,19 @@ EXPORT_SYMBOL_GPL(kernel_file_open);
  * the backing inode on the underlying filesystem, which can be
  * retrieved using backing_file_real_path().
  */
-struct file *backing_file_open(const struct path *path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
 			       const struct path *real_path,
 			       const struct cred *cred)
 {
+	const struct path *user_path = &user_file->f_path;
 	struct file *f;
 	int error;
 
-	f = alloc_empty_backing_file(flags, cred);
+	f = alloc_empty_backing_file(flags, cred, user_file);
 	if (IS_ERR(f))
 		return f;
 
-	f->f_path = *path;
+	f->f_path = *user_path;
 	path_get(real_path);
 	*backing_file_real_path(f) = *real_path;
 	error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 8be4dc050d1e..07f00cf91977 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -60,7 +60,7 @@ static struct file *ovl_open_realfile(const struct file *file,
 		if (!inode_owner_or_capable(real_idmap, realinode))
 			flags &= ~O_NOATIME;
 
-		realfile = backing_file_open(&file->f_path, flags, realpath,
+		realfile = backing_file_open(file, flags, realpath,
 					     current_cred());
 	}
 	revert_creds(old_cred);
@@ -527,6 +527,7 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	struct file *user_file = vma->vm_file;
 	struct file *realfile = file->private_data;
 	const struct cred *old_cred;
 	int ret;
@@ -540,6 +541,11 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	vma_set_file(vma, realfile);
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = security_mmap_backing_file(vma, realfile, user_file);
+	if (ret) {
+		revert_creds(old_cred);
+		return ret;
+	}
 	ret = call_mmap(vma->vm_file, vma);
 	revert_creds(old_cred);
 	ovl_file_accessed(file);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4cdeeaedaa40..5a897ee50f29 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2515,11 +2515,24 @@ struct file *dentry_open(const struct path *path, int flags,
 			 const struct cred *creds);
 struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 			   const struct cred *cred);
-struct file *backing_file_open(const struct path *path, int flags,
+struct file *backing_file_open(const struct file *user_file, int flags,
 			       const struct path *real_path,
 			       const struct cred *cred);
 struct path *backing_file_real_path(struct file *f);
 
+#ifdef CONFIG_SECURITY
+void *backing_file_security(const struct file *f);
+void backing_file_set_security(struct file *f, void *security);
+#else
+static inline void *backing_file_security(const struct file *f)
+{
+	return NULL;
+}
+static inline void backing_file_set_security(struct file *f, void *security)
+{
+}
+#endif /* CONFIG_SECURITY */
+
 /*
  * file_real_path - get the path corresponding to f_inode
  *
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 97a8b21eb033..c0a2839253fa 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -93,7 +93,7 @@ struct common_audit_data {
 #endif
 		char *kmod_name;
 		struct lsm_ioctlop_audit *op;
-		struct file *file;
+		const struct file *file;
 		struct lsm_ibpkey_audit *ibpkey;
 		struct lsm_ibendport_audit *ibendport;
 		int reason;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 2923754c13bc..567d7f16609c 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -169,6 +169,9 @@ LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
 LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
 LSM_HOOK(int, 0, file_alloc_security, struct file *file)
 LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
+LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
+	 const struct file *user_file)
+LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
 LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
 	 unsigned long arg)
 LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
@@ -176,6 +179,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
 LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
 LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
 	 unsigned long prot, unsigned long flags)
+LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
+	 struct file *backing_file, struct file *user_file)
 LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
 	 unsigned long reqprot, unsigned long prot)
 LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index dcb5e5b5eb13..3b56b60195ce 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -59,6 +59,7 @@ struct security_hook_list {
 struct lsm_blob_sizes {
 	int	lbs_cred;
 	int	lbs_file;
+	int lbs_backing_file;
 	int	lbs_inode;
 	int	lbs_superblock;
 	int	lbs_ipc;
diff --git a/include/linux/security.h b/include/linux/security.h
index 937840870d86..4866ffdb4e6c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -389,11 +389,17 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir,
 int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_free(struct file *file);
+int security_backing_file_alloc(struct file *backing_file,
+				const struct file *user_file);
+void security_backing_file_free(struct file *backing_file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int security_file_ioctl_compat(struct file *file, unsigned int cmd,
 			       unsigned long arg);
 int security_mmap_file(struct file *file, unsigned long prot,
 			unsigned long flags);
+int security_mmap_backing_file(struct vm_area_struct *vma,
+			       struct file *backing_file,
+			       struct file *user_file);
 int security_mmap_addr(unsigned long addr);
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
@@ -984,6 +990,15 @@ static inline int security_file_alloc(struct file *file)
 static inline void security_file_free(struct file *file)
 { }
 
+static inline int security_backing_file_alloc(struct file *backing_file,
+					      const struct file *user_file)
+{
+	return 0;
+}
+
+static inline void security_backing_file_free(struct file *backing_file)
+{ }
+
 static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 				      unsigned long arg)
 {
@@ -1003,6 +1018,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
 	return 0;
 }
 
+static inline int security_mmap_backing_file(struct vm_area_struct *vma,
+					     struct file *backing_file,
+					     struct file *user_file)
+{
+	return 0;
+}
+
 static inline int security_mmap_addr(unsigned long addr)
 {
 	return cap_mmap_addr(addr);
diff --git a/security/security.c b/security/security.c
index 1794860fd614..4b61766c3d27 100644
--- a/security/security.c
+++ b/security/security.c
@@ -78,6 +78,7 @@ struct security_hook_heads security_hook_heads __ro_after_init;
 static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
 
 static struct kmem_cache *lsm_file_cache;
+static struct kmem_cache *lsm_backing_file_cache;
 static struct kmem_cache *lsm_inode_cache;
 
 char *lsm_names;
@@ -200,6 +201,8 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
 
 	lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
 	lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
+	lsm_set_blob_size(&needed->lbs_backing_file,
+			     &blob_sizes.lbs_backing_file);
 	/*
 	 * The inode blob gets an rcu_head in addition to
 	 * what the modules might need.
@@ -374,6 +377,7 @@ static void __init ordered_lsm_init(void)
 
 	init_debug("cred blob size       = %d\n", blob_sizes.lbs_cred);
 	init_debug("file blob size       = %d\n", blob_sizes.lbs_file);
+	init_debug("backing_file blob size  = %d\n", blob_sizes.lbs_backing_file);
 	init_debug("inode blob size      = %d\n", blob_sizes.lbs_inode);
 	init_debug("ipc blob size        = %d\n", blob_sizes.lbs_ipc);
 	init_debug("msg_msg blob size    = %d\n", blob_sizes.lbs_msg_msg);
@@ -388,6 +392,11 @@ static void __init ordered_lsm_init(void)
 		lsm_file_cache = kmem_cache_create("lsm_file_cache",
 						   blob_sizes.lbs_file, 0,
 						   SLAB_PANIC, NULL);
+	if (blob_sizes.lbs_backing_file)
+		lsm_backing_file_cache = kmem_cache_create(
+						   "lsm_backing_file_cache",
+						   blob_sizes.lbs_backing_file,
+						   0, SLAB_PANIC, NULL);
 	if (blob_sizes.lbs_inode)
 		lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
 						    blob_sizes.lbs_inode, 0,
@@ -616,6 +625,30 @@ static int lsm_file_alloc(struct file *file)
 	return 0;
 }
 
+/**
+ * lsm_backing_file_alloc - allocate a composite backing file blob
+ * @backing_file: the backing file
+ *
+ * Allocate the backing file blob for all the modules.
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_backing_file_alloc(struct file *backing_file)
+{
+	void *blob;
+
+	if (!lsm_backing_file_cache) {
+		backing_file_set_security(backing_file, NULL);
+		return 0;
+	}
+
+	blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
+	backing_file_set_security(backing_file, blob);
+	if (!blob)
+		return -ENOMEM;
+	return 0;
+}
+
 /**
  * lsm_inode_alloc - allocate a composite inode blob
  * @inode: the inode that needs a blob
@@ -2630,6 +2663,57 @@ void security_file_free(struct file *file)
 	}
 }
 
+/**
+ * security_backing_file_alloc() - Allocate and setup a backing file blob
+ * @backing_file: the backing file
+ * @user_file: the associated user visible file
+ *
+ * Allocate a backing file LSM blob and perform any necessary initialization of
+ * the LSM blob.  There will be some operations where the LSM will not have
+ * access to @user_file after this point, so any important state associated
+ * with @user_file that is important to the LSM should be captured in the
+ * backing file's LSM blob.
+ *
+ * LSM's should avoid taking a reference to @user_file in this hook as it will
+ * result in problems later when the system attempts to drop/put the file
+ * references due to a circular dependency.
+ *
+ * Return: Return 0 if the hook is successful, negative values otherwise.
+ */
+int security_backing_file_alloc(struct file *backing_file,
+				const struct file *user_file)
+{
+	int rc;
+
+	rc = lsm_backing_file_alloc(backing_file);
+	if (rc)
+		return rc;
+	rc = call_int_hook(backing_file_alloc, 0, backing_file, user_file);
+	if (unlikely(rc))
+		security_backing_file_free(backing_file);
+
+	return rc;
+}
+
+/**
+ * security_backing_file_free() - Free a backing file blob
+ * @backing_file: the backing file
+ *
+ * Free any LSM state associate with a backing file's LSM blob, including the
+ * blob itself.
+ */
+void security_backing_file_free(struct file *backing_file)
+{
+	void *blob = backing_file_security(backing_file);
+
+	call_void_hook(backing_file_free, backing_file);
+
+	if (blob) {
+		backing_file_set_security(backing_file, NULL);
+		kmem_cache_free(lsm_backing_file_cache, blob);
+	}
+}
+
 /**
  * security_file_ioctl() - Check if an ioctl is allowed
  * @file: associated file
@@ -2723,6 +2807,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
 	return ima_file_mmap(file, prot, prot_adj, flags);
 }
 
+/**
+ * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
+ * @vma: the vm_area_struct for the mmap'd region
+ * @backing_file: the backing file being mmap'd
+ * @user_file: the user file being mmap'd
+ *
+ * Check permissions for a mmap operation on a stacked filesystem.  This hook
+ * is called after the security_mmap_file() and is responsible for authorizing
+ * the mmap on @backing_file.  It is important to note that the mmap operation
+ * on @user_file has already been authorized and the @vma->vm_file has been
+ * set to @backing_file.
+ *
+ * Return: Returns 0 if permission is granted.
+ */
+int security_mmap_backing_file(struct vm_area_struct *vma,
+			       struct file *backing_file,
+			       struct file *user_file)
+{
+	/* recommended by the stackable filesystem devs */
+	if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
+		return -EIO;
+
+	return call_int_hook(mmap_backing_file, 0, vma, backing_file, user_file);
+}
+EXPORT_SYMBOL_GPL(security_mmap_backing_file);
+
 /**
  * security_mmap_addr() - Check if mmap'ing an address is allowed
  * @addr: address
-- 
2.18.0.huawei.25


^ permalink raw reply related

* [PATCH v2 stable/linux-6.6.y 3/3] selinux: fix overlayfs mmap() and mprotect() access checks
From: Cai Xinchen @ 2026-06-27  6:57 UTC (permalink / raw)
  To: viro, brauner, jack, miklos, amir73il, paul, jmorris, serge,
	stephen.smalley.work, omosnace, gregkh, bboscaccy, caixinchen1
  Cc: linux-fsdevel, linux-kernel, linux-unionfs, linux-security-module,
	selinux, bpf, stable, lujialin4
In-Reply-To: <20260627065720.1945589-1-caixinchen1@huawei.com>

From: Paul Moore <paul@paul-moore.com>

[ Upstream commit 82544d36b1729153c8aeb179e84750f0c085d3b1 ]

The existing SELinux security model for overlayfs is to allow access if
the current task is able to access the top level file (the "user" file)
and the mounter's credentials are sufficient to access the lower
level file (the "backing" file).  Unfortunately, the current code does
not properly enforce these access controls for both mmap() and mprotect()
operations on overlayfs filesystems.

This patch makes use of the newly created security_mmap_backing_file()
LSM hook to provide the missing backing file enforcement for mmap()
operations, and leverages the backing file API and new LSM blob to
provide the necessary information to properly enforce the mprotect()
access controls.

Cc: stable@vger.kernel.org
Acked-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
[backing_file_user_path() not available
Mainline uses backing_file_user_path(file) to obtain the user-visible path
from a backing file. The 6.6.y version uses &file->f_path directly]
Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
---
 security/selinux/hooks.c          | 242 ++++++++++++++++++++++--------
 security/selinux/include/objsec.h |  11 ++
 2 files changed, 189 insertions(+), 64 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 60092d0b013c..3f11c5ae8fbf 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1717,49 +1717,72 @@ static inline int file_path_has_perm(const struct cred *cred,
 static int bpf_fd_pass(const struct file *file, u32 sid);
 #endif
 
-/* Check whether a task can use an open file descriptor to
-   access an inode in a given way.  Check access to the
-   descriptor itself, and then use dentry_has_perm to
-   check a particular permission to the file.
-   Access to the descriptor is implicitly granted if it
-   has the same SID as the process.  If av is zero, then
-   access to the file is not checked, e.g. for cases
-   where only the descriptor is affected like seek. */
-static int file_has_perm(const struct cred *cred,
-			 struct file *file,
-			 u32 av)
+static int __file_has_perm(const struct cred *cred, const struct file *file,
+			   u32 av, bool bf_user_file)
+
 {
-	struct file_security_struct *fsec = selinux_file(file);
-	struct inode *inode = file_inode(file);
 	struct common_audit_data ad;
-	u32 sid = cred_sid(cred);
+	struct inode *inode;
+	u32 ssid = cred_sid(cred);
+	u32 tsid_fd;
 	int rc;
 
-	ad.type = LSM_AUDIT_DATA_FILE;
-	ad.u.file = file;
+	if (bf_user_file) {
+		struct backing_file_security_struct *bfsec;
+		const struct path *path;
 
-	if (sid != fsec->sid) {
-		rc = avc_has_perm(sid, fsec->sid,
-				  SECCLASS_FD,
-				  FD__USE,
-				  &ad);
+		if (WARN_ON(!(file->f_mode & FMODE_BACKING)))
+			return -EIO;
+
+		bfsec = selinux_backing_file(file);
+		path = &file->f_path;
+		tsid_fd = bfsec->uf_sid;
+		inode = d_inode(path->dentry);
+
+		ad.type = LSM_AUDIT_DATA_PATH;
+		ad.u.path = *path;
+	} else {
+		struct file_security_struct *fsec = selinux_file(file);
+
+		tsid_fd = fsec->sid;
+		inode = file_inode(file);
+
+		ad.type = LSM_AUDIT_DATA_FILE;
+		ad.u.file = file;
+	}
+
+	if (ssid != tsid_fd) {
+		rc = avc_has_perm(ssid, tsid_fd, SECCLASS_FD, FD__USE, &ad);
 		if (rc)
-			goto out;
+			return rc;
 	}
 
 #ifdef CONFIG_BPF_SYSCALL
-	rc = bpf_fd_pass(file, cred_sid(cred));
+	/* regardless of backing vs user file, use the underlying file here */
+	rc = bpf_fd_pass(file, ssid);
 	if (rc)
 		return rc;
 #endif
 
 	/* av is zero if only checking access to the descriptor. */
-	rc = 0;
 	if (av)
-		rc = inode_has_perm(cred, inode, av, &ad);
+		return inode_has_perm(cred, inode, av, &ad);
 
-out:
-	return rc;
+	return 0;
+}
+
+/* Check whether a task can use an open file descriptor to
+   access an inode in a given way.  Check access to the
+   descriptor itself, and then use dentry_has_perm to
+   check a particular permission to the file.
+   Access to the descriptor is implicitly granted if it
+   has the same SID as the process.  If av is zero, then
+   access to the file is not checked, e.g. for cases
+   where only the descriptor is affected like seek. */
+static inline int file_has_perm(const struct cred *cred,
+				const struct file *file, u32 av)
+{
+	return __file_has_perm(cred, file, av, false);
 }
 
 /*
@@ -3638,6 +3661,17 @@ static int selinux_file_alloc_security(struct file *file)
 	return 0;
 }
 
+static int selinux_backing_file_alloc(struct file *backing_file,
+				      const struct file *user_file)
+{
+	struct backing_file_security_struct *bfsec;
+
+	bfsec = selinux_backing_file(backing_file);
+	bfsec->uf_sid = selinux_file(user_file)->sid;
+
+	return 0;
+}
+
 /*
  * Check whether a task has the ioctl permission and cmd
  * operation to an inode.
@@ -3755,42 +3789,55 @@ static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
 
 static int default_noexec __ro_after_init;
 
-static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
+static int __file_map_prot_check(const struct cred *cred,
+				 const struct file *file, unsigned long prot,
+				 bool shared, bool bf_user_file)
 {
-	const struct cred *cred = current_cred();
-	u32 sid = cred_sid(cred);
-	int rc = 0;
+	struct inode *inode = NULL;
+	bool prot_exec = prot & PROT_EXEC;
+	bool prot_write = prot & PROT_WRITE;
+
+	if (file) {
+		if (bf_user_file)
+			inode = d_inode(file->f_path.dentry);
+		else
+			inode = file_inode(file);
+	}
+
+	if (default_noexec && prot_exec &&
+	    (!file || IS_PRIVATE(inode) || (!shared && prot_write))) {
+		int rc;
+		u32 sid = cred_sid(cred);
 
-	if (default_noexec &&
-	    (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
-				   (!shared && (prot & PROT_WRITE)))) {
 		/*
-		 * We are making executable an anonymous mapping or a
-		 * private file mapping that will also be writable.
-		 * This has an additional check.
+		 * We are making executable an anonymous mapping or a private
+		 * file mapping that will also be writable.
 		 */
-		rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
-				  PROCESS__EXECMEM, NULL);
+		rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM,
+				  NULL);
 		if (rc)
-			goto error;
+			return rc;
 	}
 
 	if (file) {
-		/* read access is always possible with a mapping */
+		/* "read" always possible, "write" only if shared */
 		u32 av = FILE__READ;
-
-		/* write access only matters if the mapping is shared */
-		if (shared && (prot & PROT_WRITE))
+		if (shared && prot_write)
 			av |= FILE__WRITE;
-
-		if (prot & PROT_EXEC)
+		if (prot_exec)
 			av |= FILE__EXECUTE;
 
-		return file_has_perm(cred, file, av);
+		return __file_has_perm(cred, file, av, bf_user_file);
 	}
 
-error:
-	return rc;
+	return 0;
+}
+
+static inline int file_map_prot_check(const struct cred *cred,
+				      const struct file *file,
+				      unsigned long prot, bool shared)
+{
+	return __file_map_prot_check(cred, file, prot, shared, false);
 }
 
 static int selinux_mmap_addr(unsigned long addr)
@@ -3806,36 +3853,80 @@ static int selinux_mmap_addr(unsigned long addr)
 	return rc;
 }
 
-static int selinux_mmap_file(struct file *file,
-			     unsigned long reqprot __always_unused,
-			     unsigned long prot, unsigned long flags)
+static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
+				    unsigned long prot, bool shared)
 {
-	struct common_audit_data ad;
-	int rc;
-
 	if (file) {
+		int rc;
+		struct common_audit_data ad;
+
 		ad.type = LSM_AUDIT_DATA_FILE;
 		ad.u.file = file;
-		rc = inode_has_perm(current_cred(), file_inode(file),
-				    FILE__MAP, &ad);
+		rc = inode_has_perm(cred, file_inode(file), FILE__MAP, &ad);
 		if (rc)
 			return rc;
 	}
 
-	return file_map_prot_check(file, prot,
-				   (flags & MAP_TYPE) == MAP_SHARED);
+	return file_map_prot_check(cred, file, prot, shared);
+}
+
+static int selinux_mmap_file(struct file *file,
+			     unsigned long reqprot __always_unused,
+			     unsigned long prot, unsigned long flags)
+{
+	return selinux_mmap_file_common(current_cred(), file, prot,
+					(flags & MAP_TYPE) == MAP_SHARED);
+}
+
+/**
+ * selinux_mmap_backing_file - Check mmap permissions on a backing file
+ * @vma: memory region
+ * @backing_file: stacked filesystem backing file
+ * @user_file: user visible file
+ *
+ * This is called after selinux_mmap_file() on stacked filesystems, and it
+ * is this function's responsibility to verify access to @backing_file and
+ * setup the SELinux state for possible later use in the mprotect() code path.
+ *
+ * By the time this function is called, mmap() access to @user_file has already
+ * been authorized and @vma->vm_file has been set to point to @backing_file.
+ *
+ * Return zero on success, negative values otherwise.
+ */
+static int selinux_mmap_backing_file(struct vm_area_struct *vma,
+				     struct file *backing_file,
+				     struct file *user_file __always_unused)
+{
+	unsigned long prot = 0;
+
+	/* translate vma->vm_flags perms into PROT perms */
+	if (vma->vm_flags & VM_READ)
+		prot |= PROT_READ;
+	if (vma->vm_flags & VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vma->vm_flags & VM_EXEC)
+		prot |= PROT_EXEC;
+
+	return selinux_mmap_file_common(backing_file->f_cred, backing_file,
+					prot, vma->vm_flags & VM_SHARED);
 }
 
 static int selinux_file_mprotect(struct vm_area_struct *vma,
 				 unsigned long reqprot __always_unused,
 				 unsigned long prot)
 {
+	int rc;
 	const struct cred *cred = current_cred();
 	u32 sid = cred_sid(cred);
+	const struct file *file = vma->vm_file;
+	bool backing_file;
+	bool shared = vma->vm_flags & VM_SHARED;
+
+	/* check if we need to trigger the "backing files are awful" mode */
+	backing_file = file && (file->f_mode & FMODE_BACKING);
 
 	if (default_noexec &&
 	    (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
-		int rc = 0;
 		/*
 		 * We don't use the vma_is_initial_heap() helper as it has
 		 * a history of problems and is currently broken on systems
@@ -3849,11 +3940,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
 		    vma->vm_end <= vma->vm_mm->brk) {
 			rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
 					  PROCESS__EXECHEAP, NULL);
-		} else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
+			if (rc)
+				return rc;
+		} else if (!file && (vma_is_initial_stack(vma) ||
 			    vma_is_stack_for_current(vma))) {
 			rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
 					  PROCESS__EXECSTACK, NULL);
-		} else if (vma->vm_file && vma->anon_vma) {
+			if (rc)
+				return rc;
+		} else if (file && vma->anon_vma) {
 			/*
 			 * We are making executable a file mapping that has
 			 * had some COW done. Since pages might have been
@@ -3861,13 +3956,29 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
 			 * modified content.  This typically should only
 			 * occur for text relocations.
 			 */
-			rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
+			rc = __file_has_perm(cred, file, FILE__EXECMOD,
+					     backing_file);
+			if (rc)
+				return rc;
+			if (backing_file) {
+				rc = file_has_perm(file->f_cred, file,
+						   FILE__EXECMOD);
+				if (rc)
+					return rc;
+			}
 		}
+	}
+
+	rc = __file_map_prot_check(cred, file, prot, shared, backing_file);
+	if (rc)
+		return rc;
+	if (backing_file) {
+		rc = file_map_prot_check(file->f_cred, file, prot, shared);
 		if (rc)
 			return rc;
 	}
 
-	return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
+	return 0;
 }
 
 static int selinux_file_lock(struct file *file, unsigned int cmd)
@@ -6870,6 +6981,7 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
 struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
 	.lbs_cred = sizeof(struct task_security_struct),
 	.lbs_file = sizeof(struct file_security_struct),
+	.lbs_backing_file = sizeof(struct backing_file_security_struct),
 	.lbs_inode = sizeof(struct inode_security_struct),
 	.lbs_ipc = sizeof(struct ipc_security_struct),
 	.lbs_msg_msg = sizeof(struct msg_security_struct),
@@ -7074,9 +7186,11 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
 
 	LSM_HOOK_INIT(file_permission, selinux_file_permission),
 	LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
+	LSM_HOOK_INIT(backing_file_alloc, selinux_backing_file_alloc),
 	LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
 	LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
 	LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
+	LSM_HOOK_INIT(mmap_backing_file, selinux_mmap_backing_file),
 	LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
 	LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
 	LSM_HOOK_INIT(file_lock, selinux_file_lock),
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 8159fd53c3de..541933dd295c 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -60,6 +60,10 @@ struct file_security_struct {
 	u32 pseqno;		/* Policy seqno at the time of file open */
 };
 
+struct backing_file_security_struct {
+	u32 uf_sid; /* associated user file fsec->sid */
+};
+
 struct superblock_security_struct {
 	u32 sid;			/* SID of file system superblock */
 	u32 def_sid;			/* default SID for labeling */
@@ -158,6 +162,13 @@ static inline struct file_security_struct *selinux_file(const struct file *file)
 	return file->f_security + selinux_blob_sizes.lbs_file;
 }
 
+static inline struct backing_file_security_struct *
+selinux_backing_file(const struct file *backing_file)
+{
+	void *blob = backing_file_security(backing_file);
+	return blob + selinux_blob_sizes.lbs_backing_file;
+}
+
 static inline struct inode_security_struct *selinux_inode(
 						const struct inode *inode)
 {
-- 
2.18.0.huawei.25


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox