* [PATCH v18 1/8] rust: alloc: add `KBox::into_non_null`
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm
In-Reply-To: <20260625-unique-ref-v18-0-4e06b5896d47@kernel.org>
Add a method to consume a `Box<T, A>` and return a `NonNull<T>`. This
is a convenience wrapper around `Self::into_raw` for callers that need
a `NonNull` pointer rather than a raw pointer.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
---
rust/kernel/alloc/kbox.rs | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
index 35d1e015848dd..d534e8adcf7b3 100644
--- a/rust/kernel/alloc/kbox.rs
+++ b/rust/kernel/alloc/kbox.rs
@@ -211,6 +211,15 @@ pub fn leak<'a>(b: Self) -> &'a mut T {
// which points to an initialized instance of `T`.
unsafe { &mut *Box::into_raw(b) }
}
+
+ /// Consumes the `Box<T,A>` and returns a `NonNull<T>`.
+ ///
+ /// Like [`Self::into_raw`], but returns a `NonNull`.
+ #[inline]
+ pub fn into_non_null(b: Self) -> NonNull<T> {
+ // SAFETY: `KBox::into_raw` returns a valid pointer.
+ unsafe { NonNull::new_unchecked(Self::into_raw(b)) }
+ }
}
impl<T, A> Box<MaybeUninit<T>, A>
--
2.51.2
^ permalink raw reply related
* [PATCH v18 3/8] rust: implement `ForeignOwnable` for `Owned`
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm
In-Reply-To: <20260625-unique-ref-v18-0-4e06b5896d47@kernel.org>
Implement `ForeignOwnable` for `Owned<T>`. This allows use of `Owned<T>` in
places such as the `XArray`.
Note that `T` does not need to implement `ForeignOwnable` for `Owned<T>` to
implement `ForeignOwnable`.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/owned.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/rust/kernel/owned.rs b/rust/kernel/owned.rs
index 7fe9ec3e55126..9c92d4a83cc1b 100644
--- a/rust/kernel/owned.rs
+++ b/rust/kernel/owned.rs
@@ -15,6 +15,8 @@
ptr::NonNull, //
};
+use kernel::types::ForeignOwnable;
+
/// Types that specify their own way of performing allocation and destruction. Typically, this trait
/// is implemented on types from the C side.
///
@@ -186,3 +188,54 @@ fn drop(&mut self) {
unsafe { T::release(self.ptr) };
}
}
+
+// SAFETY: We derive the pointer to `T` from a valid `T`, so the returned
+// pointer satisfy alignment requirements of `T`.
+unsafe impl<T: Ownable> ForeignOwnable for Owned<T> {
+ const FOREIGN_ALIGN: usize = core::mem::align_of::<T>();
+
+ type Borrowed<'a>
+ = &'a T
+ where
+ Self: 'a;
+ type BorrowedMut<'a>
+ = Pin<&'a mut T>
+ where
+ Self: 'a;
+
+ #[inline]
+ fn into_foreign(self) -> *mut kernel::ffi::c_void {
+ let ptr = self.ptr.as_ptr().cast();
+ core::mem::forget(self);
+ ptr
+ }
+
+ #[inline]
+ unsafe fn from_foreign(ptr: *mut kernel::ffi::c_void) -> Self {
+ // INVARIANT: By the function safety contract, `ptr` was returned by `into_foreign`, which
+ // gave up exclusive ownership of a valid, pinned `T`; we retake that ownership here.
+ Self {
+ // SAFETY: By function safety contract, `ptr` came from
+ // `into_foreign` and cannot be null.
+ ptr: unsafe { NonNull::new_unchecked(ptr.cast()) },
+ }
+ }
+
+ #[inline]
+ unsafe fn borrow<'a>(ptr: *mut kernel::ffi::c_void) -> Self::Borrowed<'a> {
+ // SAFETY: By function safety requirements, `ptr` is valid for use as a
+ // reference for `'a`.
+ unsafe { &*ptr.cast() }
+ }
+
+ #[inline]
+ unsafe fn borrow_mut<'a>(ptr: *mut kernel::ffi::c_void) -> Self::BorrowedMut<'a> {
+ // SAFETY: By function safety requirements, `ptr` is valid for use as a
+ // unique reference for `'a`.
+ let inner = unsafe { &mut *ptr.cast() };
+
+ // SAFETY: We never move out of inner, and we do not hand out mutable
+ // references when `T: !Unpin`.
+ unsafe { Pin::new_unchecked(inner) }
+ }
+}
--
2.51.2
^ permalink raw reply related
* [PATCH v18 0/8] rust: add `Ownable` trait and `Owned` type
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
Asahi Lina, Oliver Mangold, Viresh Kumar, Boqun Feng, Asahi Lina,
Igor Korotin, Andreas Hindborg
Add a new trait `Ownable` and type `Owned` for types that specify their
own way of performing allocation and destruction. This is useful for
types from the C side.
Implement `ForeignOwnable` for `Owned`.
Convert `Page` to be `Ownable` and add a `from_raw` method.
Add the trait `OwnableRefCounted` that allows conversion between
`ARef` and `Owned`. This is analogous to conversion between `Arc` and
`UniqueArc`.
Patches 1-4 implement `Ownable` and applies it to `Page`. These patches
can be merged on their own.
Patches 5-7 add `Ownable` -> `ARef` interop and can be merged later if
consensus on their shape cannot be reached.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
Changes in v18:
- Rebase on `rust-next` (2026-06-24).
- Drop the `'static` bound on `ForeignOwnable for Owned` (Gary).
- Make `Ownable::release` take a raw pointer instead of `&mut self` (Alice, Sashiko).
- Drop `types::ARef` re-export (Alice).
- Drop unneeded `#[repr(transparent)]` on `Owned` (Gary).
- Fix `FOREIGN_ALIGN` for `Owned` to report the pointee alignment (Sashiko).
- Remove `BorrowedPage`; use `&Page` directly (Alice).
- Update Rust Binder for the `Owned<Page>` conversion (Alice).
- Update `pwm.rs` for the `RefCounted`/`AlwaysRefCounted` split (Sashiko).
- Fix documentation nits: missing `// INVARIANT:` comments, stale `Page` docs, and a stray `mut` (Sashiko).
- Expand the `use` statements touched by the rename patch to the multi-line style (Onur).
- Link to v17: https://msgid.link/20260604-unique-ref-v17-0-7b4c3d2930b9@kernel.org
Changes in v17:
- Rebase on v7.1-rc2.
- Reorder patches so that `Ownable` can merge without `OwnableRefCounted` (Alice).
- Add `#[inline]` directives to short functions added by the series (Gary).
- Link to v16: https://msgid.link/20260224-unique-ref-v16-0-c21afcb118d3@kernel.org
Changes in v16:
- Simplify pointer to reference cast in `Page::from_raw`.
- Use `NonNull<Page>` rather than `Owned<Page>` for `BorrowedPage` internals.
- Use "convertible to reference" wording when converting pointers to references.
- Fix formatting for `Page::from_raw` docs.
- Leave imports alone when adding safety comment to aref example.
- Use `KBox::into_nonnull` for examples.
- Add patch for `KBox::into_nonnull`.
- Change invariants and safety comments of `Ownable` and make the trait safe.
- Make `Ownable::release` take a mutable reference.
- Fix error handling in example for `Ownable`
- Link to v15: https://msgid.link/20260220-unique-ref-v15-0-893ed86b06cc@kernel.org
Changes in v15:
- Update series with original SoB's.
- Rename `AlwaysRefCounted` in `kernel::usb`.
- Rename `Owned::get_pin_mut` to `Owned::as_pin_mut`.
- Link to v14: https://msgid.link/20260204-unique-ref-v14-0-17cb29ebacbb@kernel.org
Changes in v14:
- Rebase on v6.19-rc7.
- Rewrite cover letter.
- Update documentation and safety comments based on v13 feedback.
- Update commit messages.
- Reorder implementation blocks in owned.rs.
- Update example in owned.rs to use try operator rather than `expect`.
- Reformat use statements.
- Add patch: rust: page: convert to `Ownable`.
- Add patch: rust: implement `ForeignOwnable` for `Owned`.
- Add patch: rust: page: add `from_raw()`.
- Link to v13: https://lore.kernel.org/r/20251117-unique-ref-v13-0-b5b243df1250@pm.me
Changes in v13:
- Rebase onto v6.18-rc1 (Andreas's work).
- Documentation and style fixes contributed by Andreas
- Link to v12: https://lore.kernel.org/r/20251001-unique-ref-v12-0-fa5c31f0c0c4@pm.me
Changes in v12:
-
- Rebase onto v6.17-rc1 (Andreas's work).
- moved kernel/types/ownable.rs to kernel/owned.rs
- Drop OwnableMut, make DerefMut depend on Unpin instead. I understood
ML discussion as that being okay, but probably needs further scrunity.
- Lots of more documentation changes suggested by reviewers.
- Usage example for Ownable/Owned.
- Link to v11: https://lore.kernel.org/r/20250618-unique-ref-v11-0-49eadcdc0aa6@pm.me
Changes in v11:
- Rework of documentation. I tried to honor all requests for changes "in
spirit" plus some clearifications and corrections of my own.
- Dropping `SimpleOwnedRefCounted` by request from Alice, as it creates a
potentially problematic blanket implementation (which a derive macro that
could be created later would not have).
- Dropping Miguel's "kbuild: provide `RUSTC_HAS_DO_NOT_RECOMMEND` symbol"
patch, as it is not needed anymore after dropping `SimpleOwnedRefCounted`.
(I can add it again, if it is considered useful anyway).
- Link to v10: https://lore.kernel.org/r/20250502-unique-ref-v10-0-25de64c0307f@pm.me
Changes in v10:
- Moved kernel/ownable.rs to kernel/types/ownable.rs
- Fixes in documentation / comments as suggested by Andreas Hindborg
- Added Reviewed-by comment for Andreas Hindborg
- Fix rustfmt of pid_namespace.rs
- Link to v9: https://lore.kernel.org/r/20250325-unique-ref-v9-0-e91618c1de26@pm.me
Changes in v9:
- Rebase onto v6.14-rc7
- Move Ownable/OwnedRefCounted/Ownable, etc., into separate module
- Documentation fixes to Ownable/OwnableMut/OwnableRefCounted
- Add missing SAFETY documentation to ARef example
- Link to v8: https://lore.kernel.org/r/20250313-unique-ref-v8-0-3082ffc67a31@pm.me
Changes in v8:
- Fix Co-developed-by and Suggested-by tags as suggested by Miguel and Boqun
- Some small documentation fixes in Owned/Ownable patch
- removing redundant trait constraint on DerefMut for Owned as suggested by Boqun Feng
- make SimpleOwnedRefCounted no longer implement RefCounted as suggested by Boqun Feng
- documentation for RefCounted as suggested by Boqun Feng
- Link to v7: https://lore.kernel.org/r/20250310-unique-ref-v7-0-4caddb78aa05@pm.me
Changes in v7:
- Squash patch to make Owned::from_raw/into_raw public into parent
- Added Signed-off-by to other people's commits
- Link to v6: https://lore.kernel.org/r/20250310-unique-ref-v6-0-1ff53558617e@pm.me
Changes in v6:
- Changed comments/formatting as suggested by Miguel Ojeda
- Included and used new config flag RUSTC_HAS_DO_NOT_RECOMMEND,
thus no changes to types.rs will be needed when the attribute
becomes available.
- Fixed commit message for Owned patch.
- Link to v5: https://lore.kernel.org/r/20250307-unique-ref-v5-0-bffeb633277e@pm.me
Changes in v5:
- Rebase the whole thing on top of the Ownable/Owned traits by Asahi Lina.
- Rename AlwaysRefCounted to RefCounted and make AlwaysRefCounted a
marker trait instead to allow to obtain an ARef<T> from an &T,
which (as Alice pointed out) is unsound when combined with UniqueRef/Owned.
- Change the Trait design and naming to implement this feature,
UniqueRef/UniqueRefCounted is dropped in favor of Ownable/Owned and
OwnableRefCounted is used to provide the functions to convert
between Owned and ARef.
- Link to v4: https://lore.kernel.org/r/20250305-unique-ref-v4-1-a8fdef7b1c2c@pm.me
Changes in v4:
- Just a minor change in naming by request from Andreas Hindborg,
try_shared_to_unique() -> try_from_shared(),
unique_to_shared() -> into_shared(),
which is more in line with standard Rust naming conventions.
- Link to v3: https://lore.kernel.org/r/Z8Wuud2UQX6Yukyr@mango
To: Danilo Krummrich <dakr@kernel.org>
To: Lorenzo Stoakes <ljs@kernel.org>
To: Vlastimil Babka <vbabka@kernel.org>
To: "Liam R. Howlett" <liam@infradead.org>
To: Uladzislau Rezki <urezki@gmail.com>
To: Miguel Ojeda <ojeda@kernel.org>
To: Boqun Feng <boqun@kernel.org>
To: Gary Guo <gary@garyguo.net>
To: Björn Roy Baron <bjorn3_gh@protonmail.com>
To: Benno Lossin <lossin@kernel.org>
To: Andreas Hindborg <a.hindborg@kernel.org>
To: Alice Ryhl <aliceryhl@google.com>
To: Trevor Gross <tmgross@umich.edu>
To: Daniel Almeida <daniel.almeida@collabora.com>
To: Tamir Duberstein <tamird@kernel.org>
To: Alexandre Courbot <acourbot@nvidia.com>
To: Onur Özkan <work@onurozkan.dev>
To: Lyude Paul <lyude@redhat.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: Arve Hjønnevåg <arve@android.com>
To: Todd Kjos <tkjos@android.com>
To: Christian Brauner <brauner@kernel.org>
To: Carlos Llamas <cmllamas@google.com>
To: "Rafael J. Wysocki" <rafael@kernel.org>
To: Dave Ertman <david.m.ertman@intel.com>
To: Ira Weiny <ira.weiny@intel.com>
To: Leon Romanovsky <leon@kernel.org>
To: Paul Moore <paul@paul-moore.com>
To: Serge Hallyn <sergeh@kernel.org>
To: David Airlie <airlied@gmail.com>
To: Simona Vetter <simona@ffwll.ch>
To: Alexander Viro <viro@zeniv.linux.org.uk>
To: Jan Kara <jack@suse.cz>
To: Igor Korotin <igor.korotin@linux.dev>
To: Viresh Kumar <vireshk@kernel.org>
To: Nishanth Menon <nm@ti.com>
To: Stephen Boyd <sboyd@kernel.org>
To: Bjorn Helgaas <bhelgaas@google.com>
To: Krzysztof Wilczyński <kwilczynski@kernel.org>
To: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
To: Michal Wilczynski <m.wilczynski@samsung.com>
Cc: Philipp Stanner <phasta@kernel.org>
Cc: rust-for-linux@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: driver-core@lists.linux.dev
Cc: linux-block@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Cc: linux-pwm@vger.kernel.org
---
Andreas Hindborg (3):
rust: alloc: add `KBox::into_non_null`
rust: implement `ForeignOwnable` for `Owned`
rust: page: add `from_raw()`
Asahi Lina (2):
rust: types: Add Ownable/Owned types
rust: page: convert to `Ownable`
Oliver Mangold (3):
rust: rename `AlwaysRefCounted` to `RefCounted`.
rust: Add missing SAFETY documentation for `ARef` example
rust: Add `OwnableRefCounted`
drivers/android/binder/page_range.rs | 10 +-
rust/kernel/alloc/allocator.rs | 19 +-
rust/kernel/alloc/allocator/iter.rs | 6 +-
rust/kernel/alloc/kbox.rs | 9 +
rust/kernel/auxiliary.rs | 10 +-
rust/kernel/block/mq/request.rs | 19 +-
rust/kernel/cred.rs | 16 +-
rust/kernel/device.rs | 12 +-
rust/kernel/device/property.rs | 11 +-
rust/kernel/drm/device.rs | 9 +-
rust/kernel/drm/gem/mod.rs | 16 +-
rust/kernel/fs/file.rs | 23 ++-
rust/kernel/i2c.rs | 13 +-
rust/kernel/lib.rs | 1 +
rust/kernel/mm.rs | 22 ++-
rust/kernel/mm/mmput_async.rs | 12 +-
rust/kernel/opp.rs | 16 +-
rust/kernel/owned.rs | 371 +++++++++++++++++++++++++++++++++++
rust/kernel/page.rs | 136 +++++--------
rust/kernel/pci.rs | 10 +-
rust/kernel/pid_namespace.rs | 15 +-
rust/kernel/platform.rs | 10 +-
rust/kernel/pwm.rs | 12 +-
rust/kernel/sync/aref.rs | 82 +++++---
rust/kernel/task.rs | 13 +-
rust/kernel/types.rs | 12 ++
rust/kernel/usb.rs | 17 +-
27 files changed, 721 insertions(+), 181 deletions(-)
---
base-commit: 43a393185e33e573a374c1d4f7ddf6481484ef8d
change-id: 20250305-unique-ref-29fcd675f9e9
Best regards,
--
Andreas Hindborg <a.hindborg@kernel.org>
^ permalink raw reply
* [PATCH v18 6/8] rust: Add missing SAFETY documentation for `ARef` example
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
Oliver Mangold
In-Reply-To: <20260625-unique-ref-v18-0-4e06b5896d47@kernel.org>
From: Oliver Mangold <oliver.mangold@pm.me>
SAFETY comment in rustdoc example was just 'TODO'. Fixed.
Signed-off-by: Oliver Mangold <oliver.mangold@pm.me>
Reviewed-by: Daniel Almeida <daniel.almeida@collabora.com>
Co-developed-by: Andreas Hindborg <a.hindborg@kernel.org>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/sync/aref.rs | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs
index fb7466a362741..d0865aeb9371b 100644
--- a/rust/kernel/sync/aref.rs
+++ b/rust/kernel/sync/aref.rs
@@ -142,7 +142,9 @@ pub unsafe fn from_raw(ptr: NonNull<T>) -> Self {
///
/// struct Empty {}
///
- /// # // SAFETY: TODO.
+ /// // SAFETY: The `RefCounted` implementation for `Empty` does not count references and never
+ /// // frees the underlying object. Thus we can act as owning an increment on the refcount for
+ /// // the object that we pass to the newly created `ARef`.
/// unsafe impl RefCounted for Empty {
/// fn inc_ref(&self) {}
/// unsafe fn dec_ref(_obj: NonNull<Self>) {}
@@ -150,7 +152,7 @@ pub unsafe fn from_raw(ptr: NonNull<T>) -> Self {
///
/// let mut data = Empty {};
/// let ptr = NonNull::<Empty>::new(&mut data).unwrap();
- /// # // SAFETY: TODO.
+ /// // SAFETY: We keep `data` around longer than the `ARef`.
/// let data_ref: ARef<Empty> = unsafe { ARef::from_raw(ptr) };
/// let raw_ptr: NonNull<Empty> = ARef::into_raw(data_ref);
///
--
2.51.2
^ permalink raw reply related
* [PATCH v18 2/8] rust: types: Add Ownable/Owned types
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
Asahi Lina, Oliver Mangold, Boqun Feng
In-Reply-To: <20260625-unique-ref-v18-0-4e06b5896d47@kernel.org>
From: Asahi Lina <lina+kernel@asahilina.net>
By analogy to `AlwaysRefCounted` and `ARef`, an `Ownable` type is a
(typically C FFI) type that *may* be owned by Rust, but need not be. Unlike
`AlwaysRefCounted`, this mechanism expects the reference to be unique
within Rust, and does not allow cloning.
Conceptually, this is similar to a `KBox<T>`, except that it delegates
resource management to the `T` instead of using a generic allocator.
[ om:
- Split code into separate file and `pub use` it from types.rs.
- Make from_raw() and into_raw() public.
- Remove OwnableMut, and make DerefMut dependent on Unpin instead.
- Usage example/doctest for Ownable/Owned.
- Fixes to documentation and commit message.
]
Link: https://lore.kernel.org/all/20250202-rust-page-v1-1-e3170d7fe55e@asahilina.net/
Signed-off-by: Asahi Lina <lina+kernel@asahilina.net>
Co-developed-by: Oliver Mangold <oliver.mangold@pm.me>
Signed-off-by: Oliver Mangold <oliver.mangold@pm.me>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Daniel Almeida <daniel.almeida@collabora.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
[ Andreas: Updated documentation, examples, and formatting. Change safety
requirements, safety comments. ]
Co-developed-by: Andreas Hindborg <a.hindborg@kernel.org>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/lib.rs | 1 +
rust/kernel/owned.rs | 188 +++++++++++++++++++++++++++++++++++++++++++++++
rust/kernel/sync/aref.rs | 5 ++
rust/kernel/types.rs | 5 ++
4 files changed, 199 insertions(+)
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 9512af7156df2..eb5256204a174 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -101,6 +101,7 @@
pub mod of;
#[cfg(CONFIG_PM_OPP)]
pub mod opp;
+pub mod owned;
pub mod page;
#[cfg(CONFIG_PCI)]
pub mod pci;
diff --git a/rust/kernel/owned.rs b/rust/kernel/owned.rs
new file mode 100644
index 0000000000000..7fe9ec3e55126
--- /dev/null
+++ b/rust/kernel/owned.rs
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Unique owned pointer types for objects with custom drop logic.
+//!
+//! These pointer types are useful for C-allocated objects which by API-contract
+//! are owned by Rust, but need to be freed through the C API.
+
+use core::{
+ mem::ManuallyDrop,
+ ops::{
+ Deref,
+ DerefMut, //
+ },
+ pin::Pin,
+ ptr::NonNull, //
+};
+
+/// Types that specify their own way of performing allocation and destruction. Typically, this trait
+/// is implemented on types from the C side.
+///
+/// Implementing this trait allows types to be referenced via the [`Owned<Self>`] pointer type. This
+/// is useful when it is desirable to tie the lifetime of the reference to an owned object, rather
+/// than pass around a bare reference. [`Ownable`] types can define custom drop logic that is
+/// executed when the owned reference [`Owned<Self>`] pointing to the object is dropped.
+///
+/// Note: The underlying object is not required to provide internal reference counting, because it
+/// represents a unique, owned reference. If reference counting (on the Rust side) is required,
+/// [`AlwaysRefCounted`](crate::sync::aref::AlwaysRefCounted) should be implemented.
+///
+/// # Examples
+///
+/// A minimal example implementation of [`Ownable`] and its usage with [`Owned`] looks like
+/// this:
+///
+/// ```
+/// # #![expect(clippy::disallowed_names)]
+/// # use core::cell::Cell;
+/// # use core::ptr::NonNull;
+/// # use kernel::sync::global_lock;
+/// # use kernel::alloc::{flags, kbox::KBox, AllocError};
+/// # use kernel::types::{Owned, Ownable};
+///
+/// // Let's count the allocations to see if freeing works.
+/// kernel::sync::global_lock! {
+/// // SAFETY: we call `init()` right below, before doing anything else.
+/// unsafe(uninit) static FOO_ALLOC_COUNT: Mutex<usize> = 0;
+/// }
+/// // SAFETY: We call `init()` only once, here.
+/// unsafe { FOO_ALLOC_COUNT.init() };
+///
+/// struct Foo;
+///
+/// impl Foo {
+/// fn new() -> Result<Owned<Self>> {
+/// // We are just using a `KBox` here to handle the actual allocation, as our `Foo` is
+/// // not actually a C-allocated object.
+/// let result = KBox::new(
+/// Foo {},
+/// flags::GFP_KERNEL,
+/// )?;
+/// let result = KBox::into_non_null(result);
+/// // Count new allocation
+/// *FOO_ALLOC_COUNT.lock() += 1;
+/// // SAFETY:
+/// // - We just allocated the `Self`, thus it is valid and we own it.
+/// // - We can transfer this ownership to the `from_raw` method.
+/// Ok(unsafe { Owned::from_raw(result) })
+/// }
+/// }
+///
+/// impl Ownable for Foo {
+/// unsafe fn release(this: NonNull<Self>) {
+/// // SAFETY: The [`KBox<Self>`] is still alive. We can pass ownership to the [`KBox`], as
+/// // by requirement on calling this function.
+/// drop(unsafe { KBox::from_raw(this.as_ptr()) });
+/// // Count released allocation
+/// *FOO_ALLOC_COUNT.lock() -= 1;
+/// }
+/// }
+///
+/// {
+/// let foo = Foo::new()?;
+/// assert!(*FOO_ALLOC_COUNT.lock() == 1);
+/// }
+/// // `foo` is out of scope now, so we expect no live allocations.
+/// assert!(*FOO_ALLOC_COUNT.lock() == 0);
+/// # Ok::<(), Error>(())
+/// ```
+pub trait Ownable {
+ /// Tear down this `Ownable`.
+ ///
+ /// Implementers of `Ownable` can use this function to clean up the use of `Self`. This can
+ /// include freeing the underlying object.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that they have exclusive ownership of the `Self` pointed to by `this`,
+ /// and that this ownership is transferred to the `release` method. `this` must not be used
+ /// after calling this method, as the underlying object may have been freed.
+ unsafe fn release(this: NonNull<Self>);
+}
+
+/// A mutable reference to an owned `T`.
+///
+/// The [`Ownable`] is automatically freed or released when an instance of [`Owned`] is
+/// dropped.
+///
+/// # Invariants
+///
+/// - Until `T::release` is called, this `Owned<T>` exclusively owns the underlying `T`.
+/// - The `T` value is pinned.
+pub struct Owned<T: Ownable> {
+ ptr: NonNull<T>,
+}
+
+impl<T: Ownable> Owned<T> {
+ /// Creates a new instance of [`Owned`].
+ ///
+ /// This function takes over ownership of the underlying object.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that:
+ /// - `ptr` points to a valid instance of `T`.
+ /// - Until `T::release` is called, the returned `Owned<T>` exclusively owns the underlying `T`.
+ #[inline]
+ pub unsafe fn from_raw(ptr: NonNull<T>) -> Self {
+ // INVARIANT: By function safety requirement we satisfy the first invariant of `Self`.
+ // We treat `T` as pinned from now on.
+ Self { ptr }
+ }
+
+ /// Consumes the [`Owned`], returning a raw pointer.
+ ///
+ /// This function does not drop the underlying `T`. When this function returns, ownership of the
+ /// underlying `T` is with the caller.
+ #[inline]
+ pub fn into_raw(me: Self) -> NonNull<T> {
+ ManuallyDrop::new(me).ptr
+ }
+
+ /// Get a pinned mutable reference to the data owned by this `Owned<T>`.
+ #[inline]
+ pub fn as_pin_mut(&mut self) -> Pin<&mut T> {
+ // SAFETY: The type invariants guarantee that the object is valid, and that we can safely
+ // return a mutable reference to it.
+ let unpinned = unsafe { self.ptr.as_mut() };
+
+ // SAFETY: By type invariant `T` is pinned.
+ unsafe { Pin::new_unchecked(unpinned) }
+ }
+}
+
+// SAFETY: It is safe to send an [`Owned<T>`] to another thread when the underlying `T` is [`Send`],
+// because of the ownership invariant. Sending an [`Owned<T>`] is equivalent to sending the `T`.
+unsafe impl<T: Ownable + Send> Send for Owned<T> {}
+
+// SAFETY: It is safe to send [`&Owned<T>`] to another thread when the underlying `T` is [`Sync`],
+// because of the ownership invariant. Sending an [`&Owned<T>`] is equivalent to sending the `&T`.
+unsafe impl<T: Ownable + Sync> Sync for Owned<T> {}
+
+impl<T: Ownable> Deref for Owned<T> {
+ type Target = T;
+
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: The type invariants guarantee that the object is valid.
+ unsafe { self.ptr.as_ref() }
+ }
+}
+
+impl<T: Ownable + Unpin> DerefMut for Owned<T> {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ // SAFETY: The type invariants guarantee that the object is valid, and that we can safely
+ // return a mutable reference to it.
+ unsafe { self.ptr.as_mut() }
+ }
+}
+
+impl<T: Ownable> Drop for Owned<T> {
+ #[inline]
+ fn drop(&mut self) {
+ // SAFETY: By existence of `&mut self` we exclusively own `self` and the underlying `T`. As
+ // we are dropping `self`, we can transfer ownership of the `T` to the `release` method.
+ unsafe { T::release(self.ptr) };
+ }
+}
diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs
index b721b2e00b986..3bd5eb8a1a526 100644
--- a/rust/kernel/sync/aref.rs
+++ b/rust/kernel/sync/aref.rs
@@ -34,6 +34,11 @@
/// Rust code, the recommendation is to use [`Arc`](crate::sync::Arc) to create reference-counted
/// instances of a type.
///
+/// Note: Implementing this trait allows types to be wrapped in an [`ARef<Self>`]. It requires an
+/// internal reference count and provides only shared references. If unique references are required
+/// [`Ownable`](crate::types::Ownable) should be implemented which allows types to be wrapped in an
+/// [`Owned<Self>`](crate::types::Owned).
+///
/// # Safety
///
/// Implementers must ensure that increments to the reference count keep the object alive in memory
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index ac316fd7b538f..c41eab0ec983c 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -15,6 +15,11 @@
pub mod for_lt;
pub use for_lt::ForLt;
+pub use crate::owned::{
+ Ownable,
+ Owned, //
+};
+
/// Used to transfer ownership to and from foreign (non-Rust) languages.
///
/// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and
--
2.51.2
^ permalink raw reply related
* [PATCH v18 8/8] rust: page: add `from_raw()`
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
Andreas Hindborg
In-Reply-To: <20260625-unique-ref-v18-0-4e06b5896d47@kernel.org>
From: Andreas Hindborg <a.hindborg@samsung.com>
Add a method to `Page` that allows construction of an instance from `struct
page` pointer.
Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
Reviewed-by: Onur Özkan <work@onurozkan.dev>
---
rust/kernel/page.rs | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs
index 6dc1c2395acaf..c88fda09ead5a 100644
--- a/rust/kernel/page.rs
+++ b/rust/kernel/page.rs
@@ -143,6 +143,20 @@ pub fn nid(&self) -> i32 {
unsafe { bindings::page_to_nid(self.as_ptr()) }
}
+ /// Create a `&Page` from a raw `struct page` pointer.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must be convertible to a shared reference with a lifetime of `'a`.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *const bindings::page) -> &'a Self {
+ // INVARIANT: By the function safety requirements, `ptr` refers to a valid `struct page`, so
+ // the returned reference upholds the type invariant of `Page`.
+ // SAFETY: By function safety requirements, `ptr` is not null and is convertible to a shared
+ // reference.
+ unsafe { &*ptr.cast() }
+ }
+
/// Runs a piece of code with this page mapped to an address.
///
/// The page is unmapped when this call returns.
--
2.51.2
^ permalink raw reply related
* [PATCH v18 4/8] rust: page: convert to `Ownable`
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
Asahi Lina
In-Reply-To: <20260625-unique-ref-v18-0-4e06b5896d47@kernel.org>
From: Asahi Lina <lina@asahilina.net>
This allows Page references to be returned as borrowed references,
without necessarily owning the struct page.
Remove `BorrowedPage` and update users to use `Owned<Page>`.
Signed-off-by: Asahi Lina <lina@asahilina.net>
[ Andreas: Fix formatting and add a safety comment, update users. ]
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/android/binder/page_range.rs | 10 +--
rust/kernel/alloc/allocator.rs | 19 +++---
rust/kernel/alloc/allocator/iter.rs | 6 +-
rust/kernel/page.rs | 122 +++++++++--------------------------
4 files changed, 46 insertions(+), 111 deletions(-)
diff --git a/drivers/android/binder/page_range.rs b/drivers/android/binder/page_range.rs
index e54a90e62402a..7941eb85b4ef4 100644
--- a/drivers/android/binder/page_range.rs
+++ b/drivers/android/binder/page_range.rs
@@ -33,7 +33,7 @@
sync::{aref::ARef, Mutex, SpinLock},
task::Pid,
transmute::FromBytes,
- types::Opaque,
+ types::{Opaque, Owned},
uaccess::UserSliceReader,
};
@@ -198,7 +198,7 @@ unsafe impl Send for Inner {}
#[repr(C)]
struct PageInfo {
lru: bindings::list_head,
- page: Option<Page>,
+ page: Option<Owned<Page>>,
range: *const ShrinkablePageRange,
}
@@ -206,7 +206,7 @@ impl PageInfo {
/// # Safety
///
/// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
- unsafe fn set_page(me: *mut PageInfo, page: Page) {
+ unsafe fn set_page(me: *mut PageInfo, page: Owned<Page>) {
// SAFETY: This pointer offset is in bounds.
let ptr = unsafe { &raw mut (*me).page };
@@ -229,13 +229,13 @@ unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
let ptr = unsafe { &raw const (*me).page };
// SAFETY: The pointer is valid for reading.
- unsafe { (*ptr).as_ref() }
+ unsafe { (*ptr).as_deref() }
}
/// # Safety
///
/// The caller ensures that writing to `me.page` is ok for the duration of 'a.
- unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
+ unsafe fn take_page(me: *mut PageInfo) -> Option<Owned<Page>> {
// SAFETY: This pointer offset is in bounds.
let ptr = unsafe { &raw mut (*me).page };
diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs
index cd4203f27aed0..c7b9b069cf75d 100644
--- a/rust/kernel/alloc/allocator.rs
+++ b/rust/kernel/alloc/allocator.rs
@@ -169,7 +169,7 @@ unsafe fn realloc(
}
impl Vmalloc {
- /// Convert a pointer to a [`Vmalloc`] allocation to a [`page::BorrowedPage`].
+ /// Convert a pointer to a [`Vmalloc`] allocation to a [`Page`](page::Page) reference.
///
/// # Examples
///
@@ -202,20 +202,17 @@ impl Vmalloc {
///
/// - `ptr` must be a valid pointer to a [`Vmalloc`] allocation.
/// - `ptr` must remain valid for the entire duration of `'a`.
- pub unsafe fn to_page<'a>(ptr: NonNull<u8>) -> page::BorrowedPage<'a> {
+ pub unsafe fn to_page<'a>(ptr: NonNull<u8>) -> &'a page::Page {
// SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
let page = unsafe { bindings::vmalloc_to_page(ptr.as_ptr().cast()) };
- // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid pointer
- // to `Vmalloc` memory.
- let page = unsafe { NonNull::new_unchecked(page) };
-
// SAFETY:
- // - `page` is a valid pointer to a `struct page`, given that by the safety requirements of
- // this function `ptr` is a valid pointer to a `Vmalloc` allocation.
- // - By the safety requirements of this function `ptr` is valid for the entire lifetime of
- // `'a`.
- unsafe { page::BorrowedPage::from_raw(page) }
+ // - `vmalloc_to_page` returns a valid, non-null pointer to a `struct page` for a valid
+ // pointer to `Vmalloc` memory, given that by the safety requirements of this function
+ // `ptr` is a valid pointer to a `Vmalloc` allocation.
+ // - By the safety requirements of this function `ptr`, and hence the `struct page`, is
+ // valid for the entire lifetime of `'a`.
+ unsafe { &*page.cast() }
}
}
diff --git a/rust/kernel/alloc/allocator/iter.rs b/rust/kernel/alloc/allocator/iter.rs
index 02fda3ea5cae6..8dcc16ed89893 100644
--- a/rust/kernel/alloc/allocator/iter.rs
+++ b/rust/kernel/alloc/allocator/iter.rs
@@ -9,7 +9,7 @@
ptr::NonNull, //
};
-/// An [`Iterator`] of [`page::BorrowedPage`] items owned by a [`Vmalloc`] allocation.
+/// An [`Iterator`] of [`Page`](page::Page) references owned by a [`Vmalloc`] allocation.
///
/// # Guarantees
///
@@ -28,11 +28,11 @@ pub struct VmallocPageIter<'a> {
size: usize,
/// The current page index of the [`Iterator`].
index: usize,
- _p: PhantomData<page::BorrowedPage<'a>>,
+ _p: PhantomData<&'a page::Page>,
}
impl<'a> Iterator for VmallocPageIter<'a> {
- type Item = page::BorrowedPage<'a>;
+ type Item = &'a page::Page;
fn next(&mut self) -> Option<Self::Item> {
let offset = self.index.checked_mul(page::PAGE_SIZE)?;
diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs
index 8affd8262891b..6dc1c2395acaf 100644
--- a/rust/kernel/page.rs
+++ b/rust/kernel/page.rs
@@ -12,16 +12,16 @@
code::*,
Result, //
},
+ types::{
+ Opaque,
+ Ownable,
+ Owned, //
+ },
uaccess::UserSliceReader, //
};
-use core::{
- marker::PhantomData,
- mem::ManuallyDrop,
- ops::Deref,
- ptr::{
- self,
- NonNull, //
- }, //
+use core::ptr::{
+ self,
+ NonNull, //
};
/// A bitwise shift for the page size.
@@ -65,93 +65,29 @@ pub const fn page_align(addr: usize) -> Option<usize> {
Some(sum & PAGE_MASK)
}
-/// Representation of a non-owning reference to a [`Page`].
-///
-/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
-/// [`Vmalloc`] allocation such as [`VBox`].
-///
-/// # Example
-///
-/// ```
-/// # use kernel::{bindings, prelude::*};
-/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
-/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
-///
-/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
-/// let ptr = ptr::from_ref(&**vbox);
-///
-/// // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
-/// let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
-///
-/// // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
-/// // pointer to `Vmalloc` memory.
-/// let page = unsafe { NonNull::new_unchecked(page) };
-///
-/// // SAFETY:
-/// // - `self.0` is a valid pointer to a `struct page`.
-/// // - `self.0` is valid for the entire lifetime of `self`.
-/// unsafe { BorrowedPage::from_raw(page) }
-/// }
-///
-/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
-/// let page = borrow_page(&mut vbox);
-///
-/// // SAFETY: There is no concurrent read or write to this page.
-/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
-/// # Ok::<(), Error>(())
-/// ```
-///
-/// # Invariants
-///
-/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
-///
-/// [`VBox`]: kernel::alloc::VBox
-/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
-pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
-
-impl<'a> BorrowedPage<'a> {
- /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
- ///
- /// # Safety
- ///
- /// - `ptr` must point to a valid `bindings::page`.
- /// - `ptr` must remain valid for the entire lifetime `'a`.
- pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
- let page = Page { page: ptr };
-
- // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
- // `'a`.
- Self(ManuallyDrop::new(page), PhantomData)
- }
-}
-
-impl<'a> Deref for BorrowedPage<'a> {
- type Target = Page;
-
- fn deref(&self) -> &Self::Target {
- &self.0
- }
-}
-
-/// Trait to be implemented by types which provide an [`Iterator`] implementation of
-/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
+/// Trait to be implemented by types which provide an [`Iterator`] of [`Page`] references, such as
+/// [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
pub trait AsPageIter {
/// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
- type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
+ type Iter<'a>: Iterator<Item = &'a Page>
where
Self: 'a;
- /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
+ /// Returns an [`Iterator`] of [`Page`] references over all pages owned by `self`.
fn page_iter(&mut self) -> Self::Iter<'_>;
}
-/// A pointer to a page that owns the page allocation.
+/// A `struct page`.
+///
+/// A `Page` is accessed through a shared reference or through an owning [`Owned<Page>`]; the latter
+/// frees the page allocation when it is dropped.
///
/// # Invariants
///
-/// The pointer is valid, and has ownership over the page.
+/// The `Page` is backed by a valid `struct page`.
+#[repr(transparent)]
pub struct Page {
- page: NonNull<bindings::page>,
+ page: Opaque<bindings::page>,
}
// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
@@ -185,19 +121,20 @@ impl Page {
/// # Ok::<(), kernel::alloc::AllocError>(())
/// ```
#[inline]
- pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
+ pub fn alloc_page(flags: Flags) -> Result<Owned<Self>, AllocError> {
// SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
// is always safe to call this method.
let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
let page = NonNull::new(page).ok_or(AllocError)?;
- // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
- // allocated page. We transfer that ownership to the new `Page` object.
- Ok(Self { page })
+ // SAFETY: We just successfully allocated a page, so we now have ownership of the newly
+ // allocated page. We transfer that ownership to the new `Owned<Page>` object.
+ // Since `Page` is transparent, we can cast the pointer directly.
+ Ok(unsafe { Owned::from_raw(page.cast()) })
}
/// Returns a raw pointer to the page.
pub fn as_ptr(&self) -> *mut bindings::page {
- self.page.as_ptr()
+ Opaque::cast_into(&self.page)
}
/// Get the node id containing this page.
@@ -372,10 +309,11 @@ pub unsafe fn copy_from_user_slice_raw(
}
}
-impl Drop for Page {
+impl Ownable for Page {
#[inline]
- fn drop(&mut self) {
- // SAFETY: By the type invariants, we have ownership of the page and can free it.
- unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
+ unsafe fn release(this: NonNull<Self>) {
+ // SAFETY: By the function safety requirements, we have ownership of the page and can free
+ // it. Since Page is transparent, we can cast the raw pointer directly.
+ unsafe { bindings::__free_pages(this.as_ptr().cast(), 0) };
}
}
--
2.51.2
^ permalink raw reply related
* [PATCH v18 7/8] rust: Add `OwnableRefCounted`
From: Andreas Hindborg @ 2026-06-25 10:15 UTC (permalink / raw)
To: Danilo Krummrich, Lorenzo Stoakes, Vlastimil Babka,
Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda, Boqun Feng,
Gary Guo, Björn Roy Baron, Benno Lossin, Alice Ryhl,
Trevor Gross, Daniel Almeida, Tamir Duberstein, Alexandre Courbot,
Onur Özkan, Lyude Paul, Greg Kroah-Hartman,
Arve Hjønnevåg, Todd Kjos, Christian Brauner,
Carlos Llamas, Rafael J. Wysocki, Dave Ertman, Ira Weiny,
Leon Romanovsky, Paul Moore, Serge Hallyn, David Airlie,
Simona Vetter, Alexander Viro, Jan Kara, Igor Korotin,
Viresh Kumar, Nishanth Menon, Stephen Boyd, Bjorn Helgaas,
Krzysztof Wilczyński, Pavel Tikhomirov, Michal Wilczynski
Cc: Andreas Hindborg, Philipp Stanner, rust-for-linux, linux-kernel,
linux-mm, driver-core, linux-block, linux-security-module,
dri-devel, linux-fsdevel, linux-pm, linux-pci, linux-pwm,
Oliver Mangold
In-Reply-To: <20260625-unique-ref-v18-0-4e06b5896d47@kernel.org>
From: Oliver Mangold <oliver.mangold@pm.me>
Types implementing one of these traits can safely convert between an
`ARef<T>` and an `Owned<T>`.
This is useful for types which generally are accessed through an `ARef`
but have methods which can only safely be called when the reference is
unique, like e.g. `block::mq::Request::end_ok()`.
Signed-off-by: Oliver Mangold <oliver.mangold@pm.me>
[ Andreas: Fix formatting, update documentation, fix error handling in
examples. ]
Co-developed-by: Andreas Hindborg <a.hindborg@kernel.org>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/owned.rs | 140 +++++++++++++++++++++++++++++++++++++++++++++--
rust/kernel/sync/aref.rs | 16 +++++-
rust/kernel/types.rs | 1 +
3 files changed, 151 insertions(+), 6 deletions(-)
diff --git a/rust/kernel/owned.rs b/rust/kernel/owned.rs
index e79936c00002c..bb4223c0f725a 100644
--- a/rust/kernel/owned.rs
+++ b/rust/kernel/owned.rs
@@ -14,20 +14,26 @@
pin::Pin,
ptr::NonNull, //
};
+use kernel::{
+ sync::aref::ARef,
+ types::RefCounted, //
+};
use kernel::types::ForeignOwnable;
/// Types that specify their own way of performing allocation and destruction. Typically, this trait
/// is implemented on types from the C side.
///
-/// Implementing this trait allows types to be referenced via the [`Owned<Self>`] pointer type. This
-/// is useful when it is desirable to tie the lifetime of the reference to an owned object, rather
-/// than pass around a bare reference. [`Ownable`] types can define custom drop logic that is
-/// executed when the owned reference [`Owned<Self>`] pointing to the object is dropped.
+/// Implementing this trait allows types to be referenced via the [`Owned<Self>`] pointer type.
+/// - This is useful when it is desirable to tie the lifetime of an object reference to an owned
+/// object, rather than pass around a bare reference.
+/// - [`Ownable`] types can define custom drop logic that is executed when the owned reference
+/// of type [`Owned<_>`] pointing to the object is dropped.
///
/// Note: The underlying object is not required to provide internal reference counting, because it
/// represents a unique, owned reference. If reference counting (on the Rust side) is required,
-/// [`RefCounted`](crate::types::RefCounted) should be implemented.
+/// [`RefCounted`] should be implemented. [`OwnableRefCounted`] should be implemented if conversion
+/// between unique and shared (reference counted) ownership is needed.
///
/// # Examples
///
@@ -239,3 +245,127 @@ unsafe fn borrow_mut<'a>(ptr: *mut kernel::ffi::c_void) -> Self::BorrowedMut<'a>
unsafe { Pin::new_unchecked(inner) }
}
}
+
+/// A trait for objects that can be wrapped in either one of the reference types [`Owned`] and
+/// [`ARef`].
+///
+/// # Examples
+///
+/// A minimal example implementation of [`OwnableRefCounted`], [`Ownable`] and its usage with
+/// [`ARef`] and [`Owned`] looks like this:
+///
+/// ```
+/// # #![expect(clippy::disallowed_names)]
+/// # use core::cell::Cell;
+/// # use core::ptr::NonNull;
+/// # use kernel::alloc::{flags, kbox::KBox, AllocError};
+/// # use kernel::sync::aref::{ARef, RefCounted};
+/// # use kernel::types::{Owned, Ownable, OwnableRefCounted};
+///
+/// // An internally refcounted struct for demonstration purposes.
+/// //
+/// // # Invariants
+/// //
+/// // - `refcount` is always non-zero for a valid object.
+/// // - `refcount` is >1 if there is more than one Rust reference to it.
+/// //
+/// struct Foo {
+/// refcount: Cell<usize>,
+/// }
+///
+/// impl Foo {
+/// fn new() -> Result<Owned<Self>> {
+/// // We are just using a `KBox` here to handle the actual allocation, as our `Foo` is
+/// // not actually a C-allocated object.
+/// // INVARIANT: We initialize `refcount` to 1, satisfying the invariants.
+/// let result = KBox::new(
+/// Foo {
+/// refcount: Cell::new(1),
+/// },
+/// flags::GFP_KERNEL,
+/// )?;
+/// let result = KBox::into_non_null(result);
+/// // SAFETY:
+/// // - We just allocated the `Self`, thus it is valid and we own it.
+/// // - We can transfer this ownership to the `from_raw` method.
+/// Ok(unsafe { Owned::from_raw(result) })
+/// }
+/// }
+///
+/// // SAFETY: We increment and decrement each time the respective function is called and only free
+/// // the `Foo` when the refcount reaches zero.
+/// unsafe impl RefCounted for Foo {
+/// fn inc_ref(&self) {
+/// self.refcount.replace(self.refcount.get() + 1);
+/// }
+///
+/// unsafe fn dec_ref(this: NonNull<Self>) {
+/// // SAFETY: By requirement on calling this function, the refcount is non-zero,
+/// // implying the underlying object is valid.
+/// let refcount = unsafe { &this.as_ref().refcount };
+/// let new_refcount = refcount.get() - 1;
+/// if new_refcount == 0 {
+/// // The `Foo` will be dropped when `KBox` goes out of scope.
+/// // SAFETY: The [`KBox<Foo>`] is still alive as the old refcount is 1. We can pass
+/// // ownership to the [`KBox`] as by requirement on calling this function,
+/// // the `Self` will no longer be used by the caller.
+/// unsafe { KBox::from_raw(this.as_ptr()) };
+/// } else {
+/// refcount.replace(new_refcount);
+/// }
+/// }
+/// }
+///
+/// impl OwnableRefCounted for Foo {
+/// fn try_from_shared(this: ARef<Self>) -> Result<Owned<Self>, ARef<Self>> {
+/// if this.refcount.get() == 1 {
+/// // SAFETY: The `Foo` is still alive and has no other Rust references as the refcount
+/// // is 1.
+/// Ok(unsafe { Owned::from_raw(ARef::into_raw(this)) })
+/// } else {
+/// Err(this)
+/// }
+/// }
+/// }
+///
+/// impl Ownable for Foo {
+/// unsafe fn release(this: NonNull<Self>) {
+/// // SAFETY: Using `dec_ref()` from [`RefCounted`] to release is okay, as the refcount is
+/// // always 1 for an [`Owned<Foo>`].
+/// unsafe { Foo::dec_ref(this) };
+/// }
+/// }
+///
+/// let foo = Foo::new()?;
+/// let foo = ARef::from(foo);
+/// {
+/// let bar = foo.clone();
+/// assert!(Owned::try_from(bar).is_err());
+/// }
+/// assert!(Owned::try_from(foo).is_ok());
+/// # Ok::<(), Error>(())
+/// ```
+pub trait OwnableRefCounted: RefCounted + Ownable + Sized {
+ /// Checks if the [`ARef`] is unique and converts it to an [`Owned`] if that is the case.
+ /// Otherwise it returns again an [`ARef`] to the same underlying object.
+ fn try_from_shared(this: ARef<Self>) -> Result<Owned<Self>, ARef<Self>>;
+
+ /// Converts the [`Owned`] into an [`ARef`].
+ #[inline]
+ fn into_shared(this: Owned<Self>) -> ARef<Self> {
+ // SAFETY: `Owned::into_raw` returns a pointer to a valid `Self`, and the `Owned` owned the
+ // reference count that we now transfer to the new `ARef`.
+ unsafe { ARef::from_raw(Owned::into_raw(this)) }
+ }
+}
+
+impl<T: OwnableRefCounted> TryFrom<ARef<T>> for Owned<T> {
+ type Error = ARef<T>;
+ /// Tries to convert the [`ARef`] to an [`Owned`] by calling
+ /// [`try_from_shared()`](OwnableRefCounted::try_from_shared). In case the [`ARef`] is not
+ /// unique, it returns again an [`ARef`] to the same underlying object.
+ #[inline]
+ fn try_from(b: ARef<T>) -> Result<Owned<T>, Self::Error> {
+ T::try_from_shared(b)
+ }
+}
diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs
index d0865aeb9371b..77eb390139079 100644
--- a/rust/kernel/sync/aref.rs
+++ b/rust/kernel/sync/aref.rs
@@ -23,6 +23,10 @@
ops::Deref,
ptr::NonNull, //
};
+use kernel::types::{
+ OwnableRefCounted,
+ Owned, //
+};
/// Types that are internally reference counted.
///
@@ -35,7 +39,10 @@
/// Note: Implementing this trait allows types to be wrapped in an [`ARef<Self>`]. It requires an
/// internal reference count and provides only shared references. If unique references are required
/// [`Ownable`](crate::types::Ownable) should be implemented which allows types to be wrapped in an
-/// [`Owned<Self>`](crate::types::Owned).
+/// [`Owned<Self>`](crate::types::Owned). Implementing the trait
+/// [`OwnableRefCounted`] allows to convert between unique and
+/// shared references (i.e. [`Owned<Self>`](crate::types::Owned) and
+/// [`ARef<Self>`](crate::types::Owned)).
///
/// # Safety
///
@@ -188,6 +195,13 @@ fn from(b: &T) -> Self {
}
}
+impl<T: OwnableRefCounted> From<Owned<T>> for ARef<T> {
+ #[inline]
+ fn from(b: Owned<T>) -> Self {
+ T::into_shared(b)
+ }
+}
+
impl<T: RefCounted> Drop for ARef<T> {
fn drop(&mut self) {
// SAFETY: The type invariants guarantee that the `ARef` owns the reference we're about to
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index 5ef763717e59a..6aa760952cb63 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -18,6 +18,7 @@
pub use crate::{
owned::{
Ownable,
+ OwnableRefCounted,
Owned, //
},
sync::aref::{
--
2.51.2
^ permalink raw reply related
* Re: [PATCH 2/2] mm/mprotect: drop 'sub' from batching context
From: Barry Song @ 2026-06-25 10:14 UTC (permalink / raw)
To: Dev Jain
Cc: akpm, chrisl, kasong, liam, ljs, shikemeng, nphamcs, baoquan.he,
youngjun.park, linux-mm, linux-kernel, vbabka, jannh, pfalcato,
ryan.roberts, anshuman.khandual, david
In-Reply-To: <20260623125723.2503832-3-dev.jain@arm.com>
On Tue, Jun 23, 2026 at 8:57 PM Dev Jain <dev.jain@arm.com> wrote:
>
> Shorten the name of page_anon_exclusive_sub_batch by dropping the
> "sub-batch" context - the function itself doesn't need this context.
> Similarly, drop "sub" from sub_batch_idx, it is unnecessary and the
> usage is clear enough.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
Reviewed-by: Barry Song <baohua@kernel.org>
^ permalink raw reply
* Re: [PATCH 1/2] mm/swap: rename subpage->page in folio_dup_swap/folio_put_swap
From: Barry Song @ 2026-06-25 10:13 UTC (permalink / raw)
To: Dev Jain
Cc: akpm, chrisl, kasong, liam, ljs, shikemeng, nphamcs, baoquan.he,
youngjun.park, linux-mm, linux-kernel, vbabka, jannh, pfalcato,
ryan.roberts, anshuman.khandual, david
In-Reply-To: <20260623125723.2503832-2-dev.jain@arm.com>
On Tue, Jun 23, 2026 at 8:57 PM Dev Jain <dev.jain@arm.com> wrote:
>
> Folios have pages, not subpages. Rename 'subpage' parameters to 'page'.
>
> Acked-by: David Hildenbrand (Arm) <david@kernel.org>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
Reviewed-by: Barry Song <baohua@kernel.org>
^ permalink raw reply
* Re: [PATCH v2 0/7] vmsplice: fix some problems in my previous vmsplice patchset
From: Askar Safin @ 2026-06-25 10:11 UTC (permalink / raw)
To: david
Cc: akpm, avagin, axboe, brauner, collin.funk1, david.laight.linux,
dhowells, fuse-devel, hch, jack, joannelkoong, kernel, linux-api,
linux-fsdevel, linux-kernel, linux-mm, luto, metze, miklos,
netdev, patches, pfalcato, safinaskar, torvalds, val, viro, w,
willy
In-Reply-To: <89ea76b3-e956-4232-8180-ee3929adf905@kernel.org>
"David Hildenbrand (Arm)" <david@kernel.org>:
> I think we concluded that we cannot rip out vmsplice that way at this point, and
> I suspect that Christian will drop that topic branch from -next after -rc1.
I think my patches still have a chance.
On fuse regression: I return EINVAL for particular combination of
flags used by fuse. This causes fuse to fail-back to non-vmsplice
code path. I did Debian code search, and I found none significant
packages, which use same combination of options.
So I think I was able to deal with fuse regression.
On CRIU named fifo "Not supported" regression: it is handled.
On CRIU major performance regression: it is NOT handled. But I still
think my approach is right. (See cover letter for details.)
(I wrote about all these in cover letter for this v2 patchset.)
So all regressions found so far (except for CRIU major performance
regression) are handled.
Other option is to introduce some deprecation period (as
suggested by Andrei Vagin). I can do this, if needed.
--
Askar Safin
^ permalink raw reply
* Re: [PATCH v2] mm: mglru: fix stale batch updates after memcg reparenting
From: Barry Song @ 2026-06-25 10:07 UTC (permalink / raw)
To: Qi Zheng
Cc: Harry Yoo, akpm, david, kasong, shakeel.butt, axelrasmussen,
yuanchu, weixugc, hannes, muchun.song, peiyang_he, mhocko,
roman.gushchin, ljs, linux-mm, linux-kernel, Qi Zheng, stable
In-Reply-To: <f18bf1b1-ccf7-4d77-9389-07311d2d1613@linux.dev>
On Thu, Jun 25, 2026 at 2:11 PM Qi Zheng <qi.zheng@linux.dev> wrote:
[...]
> >>
> >> Does this make sense?
> >
> > Yes, looks good to me!
>
> OK, this sync method makes more sense as it doesn't require adding a
> new lrugen->reparente. I'll go with this method and update v3.
>
> Hi Barry and Baolin, what do you think? Since the sync method has been
> changed, I will temporarily drop your previous Reviewed-by tags in v3. ;)
Feel free to proceed with the new version and drop my tag :-)
Best Regards
Barry
^ permalink raw reply
* Re: [PATCH 2/2] mm/vmpressure: split v1 userspace eventfd code into vmpressure-v1.c
From: Usama Arif @ 2026-06-25 10:06 UTC (permalink / raw)
To: Michal Koutný
Cc: Andrew Morton, david, linux-mm, hannes, tj, shakeel.butt,
roman.gushchin, liam, linux-kernel, ljs, mhocko, rppt, surenb,
vbabka, kernel-team
In-Reply-To: <ajzsE9WMl84MeBOr@localhost.localdomain>
On 25/06/2026 10:00, Michal Koutný wrote:
> On Sat, Jun 06, 2026 at 04:41:34AM -0700, Usama Arif <usama.arif@linux.dev> wrote:
>> Clean up mm/vmpressure.c by separating the cgroup v1 userspace eventfd
>> interface from the shared and v2 in-kernel code.
>>
>> Currently, almost half of mm/vmpressure.c exists to serve tree=true:
>> struct vmpressure_event, the events list and its mutex, the work_struct
>> and vmpressure_work_fn that drains tree_scanned/tree_reclaimed, the
>> parent walk, vmpressure_event(), vmpressure_register_event(),
>> vmpressure_unregister_event(), and vmpressure_prio() (which always
>> calls vmpressure() with tree=true).
>>
>> Move it all into a new mm/vmpressure-v1.c built only when
>> CONFIG_MEMCG_V1=y (following the existing memcontrol-v1.o pattern).
>
> Thanks for this dissection.
>
>> @@ -283,14 +152,8 @@ void vmpressure(gfp_t gfp, int order, struct mem_cgroup *memcg, bool tree,
>> return;
>>
>> if (tree) {
>> - spin_lock(&vmpr->sr_lock);
>> - scanned = vmpr->tree_scanned += scanned;
>> - vmpr->tree_reclaimed += reclaimed;
>> - spin_unlock(&vmpr->sr_lock);
>> -
>> - if (scanned < vmpressure_win)
>> - return;
>> - schedule_work(&vmpr->work);
>> + vmpressure_v1_account_tree(vmpr, scanned, reclaimed);
>> + return;
>> } else {
>> enum vmpressure_levels level;
>>
>
> This return; looks weird, I'd either
> a) drop it or
> b) keep it + de-indent the rest of the vmpressure().
Thanks! Will do.
^ permalink raw reply
* Re: [PATCH 2/2] mm/mm_init: drop overlap_memmap_init()
From: Wei Yang @ 2026-06-25 9:58 UTC (permalink / raw)
To: Mike Rapoport
Cc: linux-mm, Andrew Morton, David Hildenbrand, Taku Izumi, Wei Yang,
Yuan Liu, linux-kernel
In-Reply-To: <20260625073941.145014-3-rppt@kernel.org>
On Thu, Jun 25, 2026 at 10:39:40AM +0300, Mike Rapoport wrote:
>From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
>When ZONE_NORMAL and ZONE_MOVABLE could overlap because kernelcore=mirror
>didn't reduce the span of ZONE_NORMAL, initialization of the memory map had
>to skip overlapping pages during initialization of ZONE_MOVABLE to avoid
>double initialization of the same struct pages.
>
>Since kernelcore=mirror works now the same way as other variants of
>kernelcore=/movablecore=, and adjusts the span of ZONE_NORMAL, there can't
>be an overlap between ZONE_NORMAL and ZONE_MOVABLE.
>
>Remove overlap_memmap_init().
>
Hi, Mike
Do you think this is valuable to mention the double init for the overlapped
range?
>Co-developed-by: Wei Yang <richard.weiyang@gmail.com>
>Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
>---
> mm/mm_init.c | 24 ------------------------
> 1 file changed, 24 deletions(-)
>
>diff --git a/mm/mm_init.c b/mm/mm_init.c
>index dce9dc9f2302..6f0a71ccca30 100644
>--- a/mm/mm_init.c
>+++ b/mm/mm_init.c
>@@ -783,28 +783,6 @@ void __meminit init_deferred_page(unsigned long pfn, int nid)
> __init_deferred_page(pfn, nid);
> }
>
>-/* If zone is ZONE_MOVABLE but memory is mirrored, it is an overlapped init */
>-static bool __meminit
>-overlap_memmap_init(unsigned long zone, unsigned long *pfn)
>-{
>- static struct memblock_region *r __meminitdata;
>-
>- if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
>- if (!r || *pfn >= memblock_region_memory_end_pfn(r)) {
>- for_each_mem_region(r) {
>- if (*pfn < memblock_region_memory_end_pfn(r))
>- break;
>- }
>- }
>- if (*pfn >= memblock_region_memory_base_pfn(r) &&
>- memblock_is_mirror(r)) {
>- *pfn = memblock_region_memory_end_pfn(r);
>- return true;
>- }
>- }
>- return false;
>-}
>-
> /*
> * Only struct pages that correspond to ranges defined by memblock.memory
> * are zeroed and initialized by going through __init_single_page() during
>@@ -891,8 +869,6 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
> * function. They do not exist on hotplugged memory.
> */
> if (context == MEMINIT_EARLY) {
>- if (overlap_memmap_init(zone, &pfn))
>- continue;
> if (defer_init(nid, pfn, zone_end_pfn)) {
> deferred_struct_pages = true;
> break;
>--
>2.53.0
--
Wei Yang
Help you, Help me
^ permalink raw reply
* Re: [Patch mm-hotfixes v4] mm/page_vma_mapped: fix device-private PMD handling
From: Wei Yang @ 2026-06-25 9:57 UTC (permalink / raw)
To: Lance Yang
Cc: richard.weiyang, akpm, david, ljs, riel, liam, vbabka, harry,
jannh, ziy, sj, balbirs, linux-mm, linux-kernel, stable
In-Reply-To: <20260624085756.6598-1-lance.yang@linux.dev>
On Wed, Jun 24, 2026 at 04:57:56PM +0800, Lance Yang wrote:
>
>On Wed, Jun 24, 2026 at 06:53:53AM +0000, Wei Yang wrote:
>>Commit 65edfda6f3f2 ("mm/rmap: extend rmap and migration support
>>device-private entries") introduced the concept of device-private
>>PMD entries, but did not correctly update the rmap walk code to
>>account for them.
>>
>>As a result, when page_vma_mapped_walk() encounters device-private
>>PMD entries, it takes no action other than to acquire the PMD lock
>>and exit.
>>
>>However this is highly problematic for two reasons - firstly,
>>device private entries possess a PFN so check_pmd() needs to be
>>called to ensure an overlapping PFN range.
>>
>>Secondly, and more importantly, if PVMW_MIGRATION is set the
>>caller assumes the returned entry is a migration entry, resulting
>>in memory corruption when the caller tries to interpret the device
>>private entry as such.
>>
>>In addition, commit 146287290023 ("mm/huge_memory: implement
>>device-private THP splitting") allowed device private PMDs to be
>>split like THP mappings, but again did not update this code path.
>>
>>As a result, we might race a PMD split prior to acquiring the PMD
>>lock.
>>
>>This patch addresses all of these issues by invoking check_pmd(),
>>ensuring PMVW_MIGRATION is not set and checks whether a split raced
>>us we do for PMD THP and migration entries.
>>
>>Fixes: 65edfda6f3f2 ("mm/rmap: extend rmap and migration support device-private entries")
>>Cc: <stable@vger.kernel.org>
>>Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>>Suggested-by: David Hildenbrand <david@kernel.org>
>
>Shouldn't we add
>
>Suggested-by: Lorenzo Stoakes <ljs@kernel.org>
>
>as well?
>
>v4 mostly follows Lorenzo's comments, code bits included. Feels only fair.
Fair enough, added.
>
>>Cc: David Hildenbrand <david@kernel.org>
>>Cc: Balbir Singh <balbirs@nvidia.com>
>>Cc: SeongJae Park <sj@kernel.org>
>>Cc: Zi Yan <ziy@nvidia.com>
>>Cc: Lorenzo Stoakes <ljs@kernel.org>
>>Cc: Lance Yang <lance.yang@linux.dev>
>>
>>---
>>v4:
>> * refine subject and commit log based on Lorenzo's suggestion
>> * put pmd device-private entry handling in its own if branch,
>> suggested by Lorenzo
>>
>>v3:
>> * remove cleanup part, only fix the issue for device-private entry
>> * refine user effect description based on Lorenzo's suggestion
>>
>>v2: https://lore.kernel.org/all/20260616063436.20455-1-richard.weiyang@gmail.com/T/#u
>> * specify the possible error case of current code and user visible effect
>> * besides fix, cleanup the pmd entry handling based on David's suggestion
>>
>>v1: https://lore.kernel.org/linux-mm/20260508013728.21285-1-richard.weiyang@gmail.com/
>>---
>> mm/page_vma_mapped.c | 20 +++++++++++++++-----
>> 1 file changed, 15 insertions(+), 5 deletions(-)
>>
>>diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>>index 2ccbabfb2cc1..17dff8aab9f9 100644
>>--- a/mm/page_vma_mapped.c
>>+++ b/mm/page_vma_mapped.c
>>@@ -269,14 +269,24 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>
>
>Hmm ... looks like there may still be a race here ...
>
>Current code picks the branch from the lockless PMD value:
>
> pmde = pmdp_get_lockless(pvmw->pmd);
>
> if (pmd_trans_huge(pmde) || pmd_is_migration_entry(pmde)) {
> pvmw->ptl = pmd_lock(mm, pvmw->pmd);
> pmde = *pvmw->pmd;
> if (!pmd_present(pmde)) {
> softleaf_t entry;
>
> if (!thp_migration_supported() ||
> !(pvmw->flags & PVMW_MIGRATION))
> return not_found(pvmw);
> entry = softleaf_from_pmd(pmde);
>
> if (!softleaf_is_migration(entry) ||
> !check_pmd(softleaf_to_pfn(entry), pvmw))
> return not_found(pvmw);
> return true;
> }
> }
>
>But after taking PTL, the PMD may already be a different non-present PMD
>type:
>
>CPU0: pmde = pmdp_get_lockless(); // sees PMD migration entry
>
>CPU1: remove_migration_ptes(src, dst /* device-private */)
> ... via rmap_walk(dst) ...
> page_vma_mapped_walk(&pvmw /* src, PVMW_MIGRATION */)
> returns with PTL held for the PMD migration entry
> remove_migration_pmd(new = dst page)
> installs a device-private PMD
> next page_vma_mapped_walk()
> drops PTL via not_found()
>
>CPU0: takes PTL
> pmde = *pvmw->pmd; // now device-private PMD
>
>So when PVMW_MIGRATION is not set, current code can return not_found()
>before we even decode the locked PMD as a device-private entry.
>
>Commit 65edfda6f3f2 ("mm/rmap: extend rmap and migration support
>device-private entries") made the
>
>device-private PMD <-> PMD migration
>
>transition possible.
>
>set_pmd_migration_entry() can replace a device-private PMD with a PMD
>migration entry, and remove_migration_pmd() can restore a PMD migration
>entry back to a device-private PMD when the new folio is device-private.
>
Nice catch.
But I think this matters if migration fail and restore the pmd to src folio.
When we successfully migrate to new folio, check_pmd() could catch it and
return not_found(). IIUC.
One more question: assume A unmap a folio, and B migrate the same one.
If B set_pmd_migration_entry() first, then A won't see this PMD from
page_vma_mapped_walk(), IIUC. Then B failed to migrate, and restore the folio
as this PMD migration entry is there. So A should check the status after
unmap, right? Would it see unstable status?
I am a little lost what is the correct way to do here.
>Maybe decode the locked softleaf entry first, before the migration-only
>checks? Something like this on top:
>
>---8<---
>diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>index 17dff8aab9f9..97babd408dba 100644
>--- a/mm/page_vma_mapped.c
>+++ b/mm/page_vma_mapped.c
>@@ -249,10 +249,18 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
> if (!pmd_present(pmde)) {
> softleaf_t entry;
>
>+ entry = softleaf_from_pmd(pmde);
>+ if (softleaf_is_device_private(entry)) {
>+ if (pvmw->flags & PVMW_MIGRATION)
>+ return not_found(pvmw);
>+ if (!check_pmd(softleaf_to_pfn(entry), pvmw))
>+ return not_found(pvmw);
>+ return true;
>+ }
>+
If we have to do this, I am afraid we can put all three cases handling
here...
Not necessary to put pmd_is_device_private_entry() handling in two places.
> if (!thp_migration_supported() ||
> !(pvmw->flags & PVMW_MIGRATION))
> return not_found(pvmw);
>- entry = softleaf_from_pmd(pmde);
>
> if (!softleaf_is_migration(entry) ||
> !check_pmd(softleaf_to_pfn(entry), pvmw))
>@@ -266,7 +274,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
> return not_found(pvmw);
> return true;
> }
>- /* THP pmd was split under us: handle on pte level */
>+ /*
>+ * THP pmd was split under us, or device-private PMD
>+ * changed under us: handle on pte level.
>+ */
> spin_unlock(pvmw->ptl);
> pvmw->ptl = NULL;
> } else if (pmd_is_device_private_entry(pmde)) {
>--
>
>Anyway, that stuff is getting kinda messy now. Feels like it really needs
>a cleanup on top before it bites us again :)
Agree.
I haven't imagined this would be more complicated than I thought :-)
>Cheers, Lance
--
Wei Yang
Help you, Help me
^ permalink raw reply
* Re: [PATCH v8 46/46] KVM: selftests: Update private memory exits test to work with per-gmem attributes
From: Fuad Tabba @ 2026-06-25 9:56 UTC (permalink / raw)
To: ackerleytng
Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-46-9d2959357853@google.com>
On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Skip setting memory to private in the private memory exits test when using
> per-gmem memory attributes, as memory is initialized to private by default
> for guest_memfd, and using vm_mem_set_private() on a guest_memfd instance
> requires creating guest_memfd with GUEST_MEMFD_FLAG_MMAP (which is totally
> doable, but would need to be conditional and is ultimately unnecessary).
>
> Expect an emulated MMIO instead of a memory fault exit when attributes are
> per-gmem, as deleting the memslot effectively drops the private status,
> i.e. the GPA becomes shared and thus supports emulated MMIO.
>
> Skip the "memslot not private" test entirely, as private vs. shared state
> for x86 software-protected VMs comes from the memory attributes themselves,
> and so when doing in-place conversions there can never be a disconnect
> between the expected and actual states.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Cheers,
/fuad
> ---
> .../selftests/kvm/x86/private_mem_kvm_exits_test.c | 36 ++++++++++++++++++----
> 1 file changed, 30 insertions(+), 6 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
> index 10db9fe6d9063..70ed16066c63e 100644
> --- a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
> +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
> @@ -62,8 +62,9 @@ static void test_private_access_memslot_deleted(void)
>
> virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
>
> - /* Request to access page privately */
> - vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
> + /* Request to access page privately. */
> + if (!kvm_has_gmem_attributes)
> + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
>
> pthread_create(&vm_thread, NULL,
> (void *(*)(void *))run_vcpu_get_exit_reason,
> @@ -74,10 +75,26 @@ static void test_private_access_memslot_deleted(void)
> pthread_join(vm_thread, &thread_return);
> exit_reason = (u32)(u64)thread_return;
>
> - TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
> - TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
> - TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
> - TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
> + /*
> + * If attributes are tracked per-gmem, deleting the memslot that points
> + * at the gmem instance effectively makes the memory shared, and so the
> + * read should trigger emulated MMIO.
> + *
> + * If attributes are tracked per-VM, deleting the memslot shouldn't
> + * affect the private attribute, and so KVM should generate a memory
> + * fault exit (emulated MMIO on private GPAs is disallowed).
> + */
> + if (kvm_has_gmem_attributes) {
> + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MMIO);
> + TEST_ASSERT_EQ(vcpu->run->mmio.phys_addr, EXITS_TEST_GPA);
> + TEST_ASSERT_EQ(vcpu->run->mmio.len, sizeof(u64));
> + TEST_ASSERT_EQ(vcpu->run->mmio.is_write, false);
> + } else {
> + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
> + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
> + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
> + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
> + }
>
> kvm_vm_free(vm);
> }
> @@ -88,6 +105,13 @@ static void test_private_access_memslot_not_private(void)
> struct kvm_vcpu *vcpu;
> u32 exit_reason;
>
> + /*
> + * Accessing non-private memory as private with a software-protected VM
> + * isn't possible when doing in-place conversions.
> + */
> + if (kvm_has_gmem_attributes)
> + return;
> +
> vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
> guest_repeatedly_read);
>
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>
^ permalink raw reply
* Re: [PATCH v8 45/46] KVM: selftests: Update private_mem_conversions_test to mmap() guest_memfd
From: Fuad Tabba @ 2026-06-25 9:43 UTC (permalink / raw)
To: ackerleytng
Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-45-9d2959357853@google.com>
On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Update the private memory conversions selftest to also test conversions
> that are done "in-place" via per-guest_memfd memory attributes. In-place
> conversions require the host to be able to mmap() the guest_memfd so that
> the host and guest can share the same backing physical memory.
>
> This includes several updates, that are conditioned on the system
> supporting per-guest_memfd attributes (kvm_has_gmem_attributes):
>
> 1. Set up guest_memfd requesting MMAP and INIT_SHARED.
>
> 2. With in-place conversions, the host's mapping points directly to the
> guest's memory. When the guest converts a region to private, host access
> to that region is blocked. Update the test to expect a SIGBUS when
> attempting to access the host virtual address (HVA) of private memory.
>
> 3. Use vm_mem_set_memory_attributes(), which chooses how to set memory
> attributes based on whether kvm_has_gmem_attributes.
>
> Restrict the test to using VM_MEM_SRC_SHMEM because guest_memfd's required
> mmap() flags and page sizes happens to align with those of
> VM_MEM_SRC_SHMEM. As long as VM_MEM_SRC_SHMEM is used for src_type,
> vm_mem_add() works as intended.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Cheers,
/fuad
> ---
> .../kvm/x86/private_mem_conversions_test.c | 44 ++++++++++++++++++----
> 1 file changed, 36 insertions(+), 8 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> index 289ad10063fca..4308c67952310 100644
> --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> @@ -306,9 +306,12 @@ static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
> if (do_fallocate)
> vm_guest_mem_fallocate(vm, gpa, size, map_shared);
>
> - if (set_attributes)
> - vm_set_memory_attributes(vm, gpa, size,
> - map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
> + if (set_attributes) {
> + u64 attrs = map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE;
> +
> + vm_mem_set_memory_attributes(vm, gpa, size, attrs);
> + }
> +
> run->hypercall.ret = 0;
> }
>
> @@ -352,8 +355,20 @@ static void *__test_mem_conversions(void *__vcpu)
> size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
> u8 *hva = addr_gpa2hva(vm, gpa + i);
>
> - /* In all cases, the host should observe the shared data. */
> - memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
> + /*
> + * When using per-guest_memfd memory attributes,
> + * i.e. in-place conversion, host accesses will
> + * point at guest memory and should SIGBUS when
> + * guest memory is private. When using per-VM
> + * attributes, i.e. separate backing for shared
> + * vs. private, the host should always observe
> + * the shared data.
> + */
> + if (kvm_has_gmem_attributes &&
> + uc.args[0] == SYNC_PRIVATE)
> + TEST_EXPECT_SIGBUS(READ_ONCE(*hva));
> + else
> + memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
>
> /* For shared, write the new pattern to guest memory. */
> if (uc.args[0] == SYNC_SHARED)
> @@ -382,6 +397,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_v
> const size_t slot_size = memfd_size / nr_memslots;
> struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
> pthread_t threads[KVM_MAX_VCPUS];
> + u64 gmem_flags;
> struct kvm_vm *vm;
> int memfd, i;
>
> @@ -397,12 +413,17 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_v
>
> vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
>
> - memfd = vm_create_guest_memfd(vm, memfd_size, 0);
> + if (kvm_has_gmem_attributes)
> + gmem_flags = GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED;
> + else
> + gmem_flags = 0;
> +
> + memfd = vm_create_guest_memfd(vm, memfd_size, gmem_flags);
>
> for (i = 0; i < nr_memslots; i++)
> vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
> BASE_DATA_SLOT + i, slot_size / vm->page_size,
> - KVM_MEM_GUEST_MEMFD, memfd, slot_size * i, 0);
> + KVM_MEM_GUEST_MEMFD, memfd, slot_size * i, gmem_flags);
>
> for (i = 0; i < nr_vcpus; i++) {
> gpa_t gpa = BASE_DATA_GPA + i * per_cpu_size;
> @@ -452,17 +473,24 @@ static void usage(const char *cmd)
>
> int main(int argc, char *argv[])
> {
> - enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
> + enum vm_mem_backing_src_type src_type;
> u32 nr_memslots = 1;
> u32 nr_vcpus = 1;
> int opt;
>
> TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> + src_type = kvm_has_gmem_attributes ? VM_MEM_SRC_SHMEM :
> + DEFAULT_VM_MEM_SRC;
> +
> while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
> switch (opt) {
> case 's':
> src_type = parse_backing_src_type(optarg);
> + TEST_ASSERT(!kvm_has_gmem_attributes ||
> + src_type == VM_MEM_SRC_SHMEM,
> + "Testing in-place conversions, only %s mem_type supported\n",
> + vm_mem_backing_src_alias(VM_MEM_SRC_SHMEM)->name);
> break;
> case 'n':
> nr_vcpus = atoi_positive("nr_vcpus", optarg);
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>
^ permalink raw reply
* Re: [PATCH v2 13/13] mm: remove __GFP_NO_CODETAG
From: Brendan Jackman @ 2026-06-25 9:40 UTC (permalink / raw)
To: Suren Baghdasaryan, Hao Ge
Cc: Brendan Jackman, Vlastimil Babka, Harry Yoo (Oracle),
Gregory Price, Alexei Starovoitov, Matthew Wilcox, linux-mm,
linux-kernel, linux-rt-devel, Michal Hocko, Andrew Morton,
Johannes Weiner, Zi Yan, Muchun Song, Oscar Salvador,
David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
Mike Rapoport, Matthew Brost, Joshua Hahn, Rakie Kim,
Byungchul Park, Alistair Popple, Ying Huang, Hao Li,
Christoph Lameter, David Rientjes, Roman Gushchin,
Sebastian Andrzej Siewior, Clark Williams, Steven Rostedt
In-Reply-To: <CAJuCfpF9-CS-gt1S=tRNaK10yNnnod9DLEe-Y5L3ad5u60Gagg@mail.gmail.com>
On Wed Jun 24, 2026 at 4:47 PM UTC, Suren Baghdasaryan wrote:
> On Tue, Jun 23, 2026 at 12:57 AM Hao Ge <hao.ge@linux.dev> wrote:
>>
>> Hi Brendan
>>
>>
>> On 2026/6/22 18:01, Brendan Jackman wrote:
>> > Now that alloc_pages has an entrypoint that allows passing alloc_flags,
>> > we can take advantage of this to start removing GFP flags that are only
>> > used for mm-internal stuff.
>> >
>> > This requires also plumbing the alloc_flags into some more of the
>> > allocator code, in particular __alloc_pages[_noprof]() gets an
>> > alloc_flags arg to go along with its callees, and we now need to pass
>> > those flags deeper into the allocator so they can reach the alloc_tag
>> > code.
>> >
>> > To try and keep the new ALLOC_NO_CODETAG's scope nice and narrow, don't
>> > define it in mm/internal.h, instead just define a "reserved bit" and
>> > then use that in places that don't care about what it means.
>
> I don't understand why you want to narrow down visibility of one of
> the alloc_flag bits. We don't do that for any other flags, and this
> seems like an unnecessary complexity.
OK can drop this and just expose it directly.
This was just coz __GFP_NO_CODETAG was local to the .c file and it felt
like a "regression" to "leak" it into the header. But yeah on the other
hand this "reserved bit" thing is unncessary indirection.
>> > Signed-off-by: Brendan Jackman <jackmanb@google.com>
>>
>>
>> Nit: The title says "remove __GFP_NO_CODETAG" but the flag isn't really
>> removed — it's migrated from gfp_t to alloc_flags as
>>
>> ALLOC_NO_CODETAG. Something like "mm: replace __GFP_NO_CODETAG with an
>> alloc_flag" would be more accurate.
>>
>>
>> Additionally, as Lorenzo pointed out in another thread, you will likely
>> need to rebase this series later.
>>
>> I noticed Vlastimil has already landed the slab changes removing
>> __GFP_NO_OBJ_EXT into mainline:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=335c347686e76df9d2c7d7f61b5ea627a4c5cb4c
>>
>> For v3, it might make sense to fold in Vlastimil's patch so the full
>> removal of __GFP_NO_OBJ_EXT can be completed end-to-end
>>
>> https://lore.kernel.org/all/20260609-slab_alloc_flags-v1-15-2bf4a4b9b526@kernel.org/
>
> I think Vlastimil's patch will be merged before this one, so this
> patch could remove __GFP_NO_OBJ_EXT complely, saying that its last
> user (__GFP_NO_CODETAG) is gone.
Yup, Vlastimil's other patches went directly to Linus so the final
__GFP_NO_OBJ_EXT removal is already in my local branch for the v3 :)
^ permalink raw reply
* Re: [PATCH v2] mm: annotate data-race in cpu_needs_drain() and need_mlock_drain()
From: Pedro Falcato @ 2026-06-25 9:31 UTC (permalink / raw)
To: Xuewen Wang
Cc: akpm, liam, ljs, vbabka, jannh, chrisl, kasong, shikemeng,
nphamcs, baoquan.he, baohua, youngjun.park, qi.zheng,
shakeel.butt, axelrasmussen, yuanchu, weixugc, david, linux-mm,
linux-kernel
In-Reply-To: <20260625065153.1581419-1-wangxuewen@kylinos.cn>
On Thu, Jun 25, 2026 at 02:51:53PM +0800, Xuewen Wang wrote:
> KCSAN reports a data-race when cpu_needs_drain() reads another CPU's
> per-cpu folio_batch->nr without locking, while the owning CPU writes
> to it via folio_batch_add(). The same race exists in need_mlock_drain()
> which is called from cpu_needs_drain().
>
> Reading a slightly stale value is harmless -- cpu_needs_drain() only
> decides whether to schedule a drain, and the next iteration of
> __lru_add_drain_all() will re-check.
>
> All other callers of folio_batch_count() either use stack variables or
> access their own CPU's per-cpu data where no race exists, so
> data_race() is added at the call sites rather than in
> folio_batch_count() itself to avoid suppressing KCSAN warnings for
> future callers that may have real bugs.
>
> Signed-off-by: Xuewen Wang <wangxuewen@kylinos.cn>
> ---
> Changes in v2:
> - Use data_race() instead of READ_ONCE() in folio_batch_count(), as
> suggested by Lorenzo. READ_ONCE() is unnecessary for a single-byte
> read and imposes overhead on all callers, most of which have no race.
> - Move the annotation from folio_batch_count() to the actual call sites
> (cpu_needs_drain() and need_mlock_drain()) where the cross-CPU race
> occurs, rather than affecting all callers.
> - Add need_mlock_drain() which has the same cross-CPU race.
> - Add comments explaining why the data race is safe.
> v1:
> https://lore.kernel.org/all/20260624092606.1083449-1-wangxuewen@kylinos.cn/
> ---
> mm/mlock.c | 2 +-
> mm/swap.c | 12 ++++++------
> 2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 8c227fefa2df..fbdb5018e2c3 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -232,7 +232,7 @@ void mlock_drain_remote(int cpu)
>
> bool need_mlock_drain(int cpu)
> {
> - return folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu));
> + return data_race(folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu)));
> }
>
> /**
> diff --git a/mm/swap.c b/mm/swap.c
> index 588f50d8f1a8..d046428caed6 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -828,12 +828,12 @@ static bool cpu_needs_drain(unsigned int cpu)
> struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
>
> /* Check these in order of likelihood that they're not zero */
> - return folio_batch_count(&fbatches->lru_add) ||
> - folio_batch_count(&fbatches->lru_move_tail) ||
> - folio_batch_count(&fbatches->lru_deactivate_file) ||
> - folio_batch_count(&fbatches->lru_deactivate) ||
> - folio_batch_count(&fbatches->lru_lazyfree) ||
> - folio_batch_count(&fbatches->lru_activate) ||
> + return data_race(folio_batch_count(&fbatches->lru_add)) ||
> + data_race(folio_batch_count(&fbatches->lru_move_tail)) ||
> + data_race(folio_batch_count(&fbatches->lru_deactivate_file)) ||
> + data_race(folio_batch_count(&fbatches->lru_deactivate)) ||
> + data_race(folio_batch_count(&fbatches->lru_lazyfree)) ||
> + data_race(folio_batch_count(&fbatches->lru_activate)) ||
> need_mlock_drain(cpu) ||
> has_bh_in_lru(cpu, NULL);
> }
eww.
How about:
static bool cpu_needs_drain(unsigned int cpu)
{
struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
/* Check these in order of likelihood that they're not zero */
return data_race(
folio_batch_count(&fbatches->lru_add) ||
folio_batch_count(&fbatches->lru_move_tail) ||
folio_batch_count(&fbatches->lru_deactivate_file) ||
folio_batch_count(&fbatches->lru_deactivate) ||
folio_batch_count(&fbatches->lru_lazyfree) ||
folio_batch_count(&fbatches->lru_activate) ||
need_mlock_drain(cpu)) ||
has_bh_in_lru(cpu, NULL);
}
this should work equally well, while being far more aesthetically pleasing :)
> --
> 2.25.1
>
--
Pedro
^ permalink raw reply
* Re: [PATCH v8 44/46] KVM: selftests: Make TEST_EXPECT_SIGBUS thread-safe
From: Fuad Tabba @ 2026-06-25 9:30 UTC (permalink / raw)
To: ackerleytng
Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-44-9d2959357853@google.com>
On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> The TEST_EXPECT_SIGBUS macro is not thread-safe as it uses a global
> sigjmp_buf and installs a global SIGBUS signal handler. If multiple threads
> execute the macro concurrently, they will race on installing the signal
> handler and stomp on other threads' jump buffers, leading to incorrect test
> behavior.
>
> Make TEST_EXPECT_SIGBUS thread-safe with the following changes:
>
> Share the KVM tests' global signal handler. sigaction() applies to all
> threads; without sharing a global signal handler, one thread may have
> removed the signal handler that another thread added, hence leading to
> unexpected signals.
>
> The alternative of layering signal handlers was considered, but calling
> sigaction() within TEST_EXPECT_SIGBUS() necessarily creates a race. To
> avoid adding new setup and teardown routines to do sigaction() and keep
> usage of TEST_EXPECT_SIGBUS() simple, share the KVM tests' global signal
> handler.
>
> Opportunistically rename report_unexpected_signal to
> catchall_signal_handler.
>
> To continue to only expect SIGBUS within specific regions of code, use a
> thread-specific variable, expecting_sigbus, to replace installing and
> removing signal handlers.
>
> Make the execution environment for the thread, sigjmp_buf, a
> thread-specific variable.
>
> As part of TEST_EXPECT_SIGBUS(), assert the prerequisite for this setup,
> that the current signal handler is the catchall_signal_handler.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Cheers,
/fuad
> ---
> tools/testing/selftests/kvm/include/test_util.h | 32 +++++++++++++------------
> tools/testing/selftests/kvm/lib/kvm_util.c | 18 ++++++++++----
> tools/testing/selftests/kvm/lib/test_util.c | 7 ------
> 3 files changed, 30 insertions(+), 27 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
> index 51287fac8138a..bd75162ec868d 100644
> --- a/tools/testing/selftests/kvm/include/test_util.h
> +++ b/tools/testing/selftests/kvm/include/test_util.h
> @@ -82,21 +82,23 @@ do { \
> __builtin_unreachable(); \
> } while (0)
>
> -extern sigjmp_buf expect_sigbus_jmpbuf;
> -void expect_sigbus_handler(int signum);
> -
> -#define TEST_EXPECT_SIGBUS(action) \
> -do { \
> - struct sigaction sa_old, sa_new = { \
> - .sa_handler = expect_sigbus_handler, \
> - }; \
> - \
> - sigaction(SIGBUS, &sa_new, &sa_old); \
> - if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
> - action; \
> - TEST_FAIL("'%s' should have triggered SIGBUS", #action); \
> - } \
> - sigaction(SIGBUS, &sa_old, NULL); \
> +extern __thread sigjmp_buf expect_sigbus_jmpbuf;
> +extern __thread volatile sig_atomic_t expecting_sigbus;
> +extern void catchall_signal_handler(int signum);
> +
> +#define TEST_EXPECT_SIGBUS(action) \
> +do { \
> + struct sigaction __sa = {}; \
> + \
> + TEST_ASSERT_EQ(sigaction(SIGBUS, NULL, &__sa), 0); \
> + TEST_ASSERT_EQ(__sa.sa_handler, &catchall_signal_handler); \
> + \
> + expecting_sigbus = true; \
> + if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
> + action; \
> + TEST_FAIL("'%s' should have triggered SIGBUS", #action);\
> + } \
> + expecting_sigbus = false; \
> } while (0)
>
> size_t parse_size(const char *size);
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 6b304e8a0e0d5..b4f104436875b 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -2292,13 +2292,20 @@ __weak void kvm_selftest_arch_init(void)
> {
> }
>
> -static void report_unexpected_signal(int signum)
> +__thread sigjmp_buf expect_sigbus_jmpbuf;
> +__thread volatile sig_atomic_t expecting_sigbus;
> +
> +void catchall_signal_handler(int signum)
> {
> + switch (signum) {
> + case SIGBUS: {
> + if (expecting_sigbus)
> + siglongjmp(expect_sigbus_jmpbuf, 1);
> +
> + TEST_FAIL("Unexpected SIGBUS (%d)\n", signum);
> + }
> #define KVM_CASE_SIGNUM(sig) \
> case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
> -
> - switch (signum) {
> - KVM_CASE_SIGNUM(SIGBUS);
> KVM_CASE_SIGNUM(SIGSEGV);
> KVM_CASE_SIGNUM(SIGILL);
> KVM_CASE_SIGNUM(SIGFPE);
> @@ -2310,12 +2317,13 @@ static void report_unexpected_signal(int signum)
> void __attribute((constructor)) kvm_selftest_init(void)
> {
> struct sigaction sig_sa = {
> - .sa_handler = report_unexpected_signal,
> + .sa_handler = catchall_signal_handler,
> };
>
> /* Tell stdout not to buffer its content. */
> setbuf(stdout, NULL);
>
> + expecting_sigbus = false;
> sigaction(SIGBUS, &sig_sa, NULL);
> sigaction(SIGSEGV, &sig_sa, NULL);
> sigaction(SIGILL, &sig_sa, NULL);
> diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
> index bab1bd2b775b6..30eb701e4becd 100644
> --- a/tools/testing/selftests/kvm/lib/test_util.c
> +++ b/tools/testing/selftests/kvm/lib/test_util.c
> @@ -18,13 +18,6 @@
>
> #include "test_util.h"
>
> -sigjmp_buf expect_sigbus_jmpbuf;
> -
> -void __attribute__((used)) expect_sigbus_handler(int signum)
> -{
> - siglongjmp(expect_sigbus_jmpbuf, 1);
> -}
> -
> /*
> * Random number generator that is usable from guest code. This is the
> * Park-Miller LCG using standard constants.
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>
^ permalink raw reply
* Re: [PATCH] mm/memory: refactor finish_fault
From: Sarthak Sharma @ 2026-06-25 9:25 UTC (permalink / raw)
To: David Hildenbrand (Arm), Andrew Morton
Cc: Lorenzo Stoakes, Liam R . Howlett, Vlastimil Babka, Mike Rapoport,
Suren Baghdasaryan, Michal Hocko, Dev Jain, linux-mm,
linux-kernel
In-Reply-To: <4f43c837-0f9a-4302-a3d7-f252d84ea390@kernel.org>
Hi David!
On 6/25/26 2:22 PM, David Hildenbrand (Arm) wrote:
> On 6/24/26 12:20, Sarthak Sharma wrote:
>> finish_fault() currently has a goto fallback implementation
>> where we try to map a large folio with PTEs. If that cannot be
>> installed, we goto fallback and go through the fallback mapping
>> path again. This looks weird and is tough to comprehend.
>>
>> Remove the goto fallback implementation and try to map the
>> whole folio if allowed. If the whole folio cannot be mapped,
>> fall back to single page mapping without repeating the whole
>> function.
>>
>> The cleanup of finish_fault() was suggested by David in [1].
>>
>> [1] https://lore.kernel.org/all/3684c55a-6581-4731-b94a-19526f455a1e@kernel.org/
>>
>> Suggested-by: David Hildenbrand (Arm) <david@kernel.org>
>> Signed-off-by: Sarthak Sharma <sarthak.sharma@arm.com>
>> ---
>> Tested this patch by running mm selftests on baseline and patched 7.1
>> kernels. No regressions were observed.
>
> This goes into the right direction, but I think we can do better.
>
> For example, we know that we always have to fallback to a single PTE with
> userfaultfd (incl. not mapping a PMD-sized folio by PMDs).
>
> Let me find some time to play with this myself.
>
I intended this patch to be just a refactor to get rid of the goto
fallback mechanism and make the VMA and PTE table bound checks easier to
read.
But yeah I agree that we can work on cases like you mentioned about
userfaultfd. Thanks for the feedback.
^ permalink raw reply
* Re: [PATCH v8 43/46] KVM: selftests: Check fd/flags provided to mmap() when setting up memslot
From: Fuad Tabba @ 2026-06-25 9:20 UTC (permalink / raw)
To: ackerleytng
Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-43-9d2959357853@google.com>
On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Check that a valid fd provided to mmap() must be accompanied by MAP_SHARED.
>
> With an invalid fd (usually used for anonymous mappings), there are no
> constraints on mmap() flags.
>
> Add this check to make sure that when a guest_memfd is used as region->fd,
> the flag provided to mmap() will include MAP_SHARED.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> [Rephrase assertion message.]
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Cheers,
/fuad
> ---
> tools/testing/selftests/kvm/lib/kvm_util.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 0b2256ea65ff9..6b304e8a0e0d5 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -1110,6 +1110,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
> src_type == VM_MEM_SRC_SHARED_HUGETLB);
> }
>
> + TEST_ASSERT(region->fd == -1 || backing_src_is_shared(src_type),
> + "A valid fd provided to mmap() must be accompanied by MAP_SHARED.");
> +
> region->mmap_start = __kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
> vm_mem_backing_src_alias(src_type)->flag,
> region->fd, mmap_offset);
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>
^ permalink raw reply
* Re: [PATCH] Docs/mm: fix documentation warning for GFP parameter in kmalloc_obj, kmalloc_objs and kmalloc_flex
From: Vlastimil Babka (SUSE) @ 2026-06-25 9:11 UTC (permalink / raw)
To: Andrew Morton, Jakov Novak
Cc: linux-mm, linux-kernel, Harry Yoo, Hao Li, Christoph Lameter,
David Rientjes, Roman Gushchin, linux-kernel-mentees, Shuah Khan
In-Reply-To: <20260624184856.55fe055b85e17ed2c6572182@linux-foundation.org>
On 6/25/26 03:48, Andrew Morton wrote:
> On Fri, 19 Jun 2026 13:36:22 +0200 Jakov Novak <jakovnovak30@gmail.com> wrote:
>
>> Subject: [PATCH] Docs/mm: fix documentation warning for GFP parameter in kmalloc_obj, kmalloc_objs and kmalloc_flex
>
> Thanks.
>
> "mm/slab: ..." would be a better subject.
>
>> Date: Fri, 19 Jun 2026 13:36:22 +0200
>> X-Mailer: git-send-email 2.54.0
>>
>> Compiling the documentation currently gives the errors:
>>
>> WARNING: ./include/linux/slab.h:1100 Excess function parameter 'GFP' description in 'kmalloc_obj'
>> WARNING: ./include/linux/slab.h:1112 Excess function parameter 'GFP' description in 'kmalloc_objs'
>> WARNING: ./include/linux/slab.h:1127 Excess function parameter 'GFP' description in 'kmalloc_flex'
>> WARNING: ./include/linux/slab.h:1100 Excess function parameter 'GFP' description in 'kmalloc_obj'
>> WARNING: ./include/linux/slab.h:1112 Excess function parameter 'GFP' description in 'kmalloc_objs'
>> WARNING: ./include/linux/slab.h:1127 Excess function parameter 'GFP' description in 'kmalloc_flex'
>>
>> This effectively omits the GFP parameter from the current kernel
>> documentation. This patch marks the "..." parameter with the previous
>> description of the GFP parameter along with an "optional" tag in
>> parantheses.
>
> "parentheses".
>
> I'll assume that Vlastimil will be processing this patch.
As Harry pointed out, Randy already fixed this, since then it's in mainline.
^ permalink raw reply
* Re: [PATCH 1/1] iomap: avoid compaction for costly folio order allocation
From: Salvatore Dipietro @ 2026-06-25 9:10 UTC (permalink / raw)
To: hch, ritesh.list
Cc: abuehaze, akpm, alisaidi, blakgeof, brauner, david,
dipietro.salvatore, dipiets, djwong, linux-fsdevel, linux-kernel,
linux-mm, linux-xfs, ljs, mhocko, rppt, stable, vbabka, vbabka,
willy
In-Reply-To: <ajvc7fSDngyx0X5j@infradead.org>
On Wed, Jun 24, 2026 at 12:21:00PM +0000, Ritesh Harjani wrote:
> Sorry about the delay. I did bring this topic up in one of our internal
> ext4 community calls. And to share some context, MM community thinks we
> need a better long term fix for this problem rather than patching call
> sites and/or playing tricks like -
Thanks Ritesh for the update and for bringing this to the wider MM community.
I completely understand that the MM community is looking for a proper long-term
fix rather than specific patching.
On Wed, Jun 24, 2026 at 13:34:00PM +0000, Christoph Hellwig wrote:
> Do you have ointers to the patches for each approach above?
Yes — all the patches in the result table are shared within this thread:
v1 (original, iomap caller): The original PATCH 1/1 in this series
Ritesh's suggestion (mm/filemap.c): Shared in Ritesh's reply on May 3rd [1]
Matthew's suggestion (mm/filemap.c): Shared in Matthew's reply on April 4th [2]
kcompactd background (mm/page_alloc.c): Shared in my reply on May 6th [3]
[1] https://lore.kernel.org/all/20260403193535.9970-1-dipiets@amazon.it/T/#m8c3da1c9fb9e9c66d4e8b1849de824b0ecf37f9e
[2] https://lore.kernel.org/all/20260403193535.9970-1-dipiets@amazon.it/T/#m4b90cf280ff0efcf178dfd8d068f1de3b262e28a
[3] https://lore.kernel.org/all/20260403193535.9970-1-dipiets@amazon.it/T/#m22977355535a4599084a51104a4df806d49c53d1
--
Salvatore
AMAZON DEVELOPMENT CENTER ITALY SRL, viale Monte Grappa 3/5, 20124 Milano, Italia, Registro delle Imprese di Milano Monza Brianza Lodi REA n. 2504859, Capitale Sociale: 10.000 EUR i.v., Cod. Fisc. e P.IVA 10100050961, Societa con Socio Unico
^ permalink raw reply
* Re: [PATCH v8 42/46] KVM: selftests: Provide common function to set memory attributes
From: Fuad Tabba @ 2026-06-25 9:09 UTC (permalink / raw)
To: ackerleytng
Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-42-9d2959357853@google.com>
On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Introduce vm_mem_set_memory_attributes(), which handles setting of memory
> attributes for a range of guest physical addresses, regardless of whether
> the attributes should be set via guest_memfd or via the memory attributes
> at the VM level.
>
> Refactor existing vm_mem_set_{shared,private} functions to use the new
> function. Opportunistically update the size parameter to use size_t instead
> of u64.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Cheers,
/fuad
> ---
> tools/testing/selftests/kvm/include/kvm_util.h | 46 +++++++++++++++++++-------
> 1 file changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index 3a6b1fa7f26ef..db1442da21bb1 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -454,18 +454,6 @@ static inline void vm_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa,
> vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
> }
>
> -static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa,
> - u64 size)
> -{
> - vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
> -}
> -
> -static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa,
> - u64 size)
> -{
> - vm_set_memory_attributes(vm, gpa, size, 0);
> -}
> -
> static inline int __gmem_set_memory_attributes(int fd, u64 offset,
> size_t size, u64 attributes,
> u64 *error_offset)
> @@ -532,6 +520,40 @@ static inline void gmem_set_shared(int fd, u64 offset, size_t size)
> gmem_set_memory_attributes(fd, offset, size, 0);
> }
>
> +static inline void vm_mem_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa,
> + size_t size, u64 attrs)
> +{
> + if (kvm_has_gmem_attributes) {
> + gpa_t end = gpa + size;
> + off_t fd_offset;
> + gpa_t addr;
> + size_t len;
> + int fd;
> +
> + for (addr = gpa; addr < end; addr += len) {
> + fd = kvm_gpa_to_guest_memfd(vm, addr, &fd_offset, &len);
> + len = min(end - addr, len);
> +
> + gmem_set_memory_attributes(fd, fd_offset, len, attrs);
> + }
> + } else {
> + vm_set_memory_attributes(vm, gpa, size, attrs);
> + }
> +}
> +
> +static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa,
> + size_t size)
> +{
> + vm_mem_set_memory_attributes(vm, gpa, size,
> + KVM_MEMORY_ATTRIBUTE_PRIVATE);
> +}
> +
> +static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa,
> + size_t size)
> +{
> + vm_mem_set_memory_attributes(vm, gpa, size, 0);
> +}
> +
> void vm_guest_mem_fallocate(struct kvm_vm *vm, gpa_t gpa, u64 size,
> bool punch_hole);
>
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox