Linux block layer
 help / color / mirror / Atom feed
* [PATCH v6 01/10] rust: module: move module types into `module.rs`
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Move `Module`, `InPlaceModule`, `ModuleMetadata` and `ThisModule` from
`lib.rs` into a new `rust/kernel/module.rs`. Re-export them from `lib.rs`
to avoid tree-wide changes.

Switch six bus driver registrations from `module.0` to the public
`ThisModule::as_ptr()` accessor, since the field is no longer visible
outside the new `module` submodule.

No functional change.

Assisted-by: opencode:glm-5.2
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 rust/kernel/auxiliary.rs |  2 +-
 rust/kernel/i2c.rs       |  2 +-
 rust/kernel/lib.rs       | 75 +++++-------------------------------------------
 rust/kernel/module.rs    | 71 +++++++++++++++++++++++++++++++++++++++++++++
 rust/kernel/net/phy.rs   |  6 +++-
 rust/kernel/pci.rs       |  2 +-
 rust/kernel/platform.rs  |  2 +-
 rust/kernel/usb.rs       |  2 +-
 8 files changed, 88 insertions(+), 74 deletions(-)

diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs
index 93c0db1f66555..4a02f83240be3 100644
--- a/rust/kernel/auxiliary.rs
+++ b/rust/kernel/auxiliary.rs
@@ -63,7 +63,7 @@ unsafe fn register(
 
         // SAFETY: `adrv` is guaranteed to be a valid `DriverType`.
         to_result(unsafe {
-            bindings::__auxiliary_driver_register(adrv.get(), module.0, name.as_char_ptr())
+            bindings::__auxiliary_driver_register(adrv.get(), module.as_ptr(), name.as_char_ptr())
         })
     }
 
diff --git a/rust/kernel/i2c.rs b/rust/kernel/i2c.rs
index 7b908f0c5a58d..24eff08f47123 100644
--- a/rust/kernel/i2c.rs
+++ b/rust/kernel/i2c.rs
@@ -142,7 +142,7 @@ unsafe fn register(
         }
 
         // SAFETY: `idrv` is guaranteed to be a valid `DriverType`.
-        to_result(unsafe { bindings::i2c_register_driver(module.0, idrv.get()) })
+        to_result(unsafe { bindings::i2c_register_driver(module.as_ptr(), idrv.get()) })
     }
 
     unsafe fn unregister(idrv: &Opaque<Self::DriverType>) {
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index b72b2fbe046d6..040ae85056509 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -93,6 +93,7 @@
 pub mod maple_tree;
 pub mod miscdevice;
 pub mod mm;
+pub mod module;
 pub mod module_param;
 #[cfg(CONFIG_NET)]
 pub mod net;
@@ -139,79 +140,17 @@
 #[doc(hidden)]
 pub use bindings;
 pub use macros;
+pub use module::{
+    InPlaceModule,
+    Module,
+    ModuleMetadata,
+    ThisModule, //
+};
 pub use uapi;
 
 /// Prefix to appear before log messages printed from within the `kernel` crate.
 const __LOG_PREFIX: &[u8] = b"rust_kernel\0";
 
-/// The top level entrypoint to implementing a kernel module.
-///
-/// For any teardown or cleanup operations, your type may implement [`Drop`].
-pub trait Module: Sized + Sync + Send {
-    /// Called at module initialization time.
-    ///
-    /// Use this method to perform whatever setup or registration your module
-    /// should do.
-    ///
-    /// Equivalent to the `module_init` macro in the C API.
-    fn init(module: &'static ThisModule) -> error::Result<Self>;
-}
-
-/// A module that is pinned and initialised in-place.
-pub trait InPlaceModule: Sync + Send {
-    /// Creates an initialiser for the module.
-    ///
-    /// It is called when the module is loaded.
-    fn init(module: &'static ThisModule) -> impl pin_init::PinInit<Self, error::Error>;
-}
-
-impl<T: Module> InPlaceModule for T {
-    fn init(module: &'static ThisModule) -> impl pin_init::PinInit<Self, error::Error> {
-        let initer = move |slot: *mut Self| {
-            let m = <Self as Module>::init(module)?;
-
-            // SAFETY: `slot` is valid for write per the contract with `pin_init_from_closure`.
-            unsafe { slot.write(m) };
-            Ok(())
-        };
-
-        // SAFETY: On success, `initer` always fully initialises an instance of `Self`.
-        unsafe { pin_init::pin_init_from_closure(initer) }
-    }
-}
-
-/// Metadata attached to a [`Module`] or [`InPlaceModule`].
-pub trait ModuleMetadata {
-    /// The name of the module as specified in the `module!` macro.
-    const NAME: &'static crate::str::CStr;
-}
-
-/// Equivalent to `THIS_MODULE` in the C API.
-///
-/// C header: [`include/linux/init.h`](srctree/include/linux/init.h)
-pub struct ThisModule(*mut bindings::module);
-
-// SAFETY: `THIS_MODULE` may be used from all threads within a module.
-unsafe impl Sync for ThisModule {}
-
-impl ThisModule {
-    /// Creates a [`ThisModule`] given the `THIS_MODULE` pointer.
-    ///
-    /// # Safety
-    ///
-    /// The pointer must be equal to the right `THIS_MODULE`.
-    pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule {
-        ThisModule(ptr)
-    }
-
-    /// Access the raw pointer for this module.
-    ///
-    /// It is up to the user to use it correctly.
-    pub const fn as_ptr(&self) -> *mut bindings::module {
-        self.0
-    }
-}
-
 #[cfg(not(testlib))]
 #[panic_handler]
 fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
diff --git a/rust/kernel/module.rs b/rust/kernel/module.rs
new file mode 100644
index 0000000000000..be242a82e86d2
--- /dev/null
+++ b/rust/kernel/module.rs
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Module-related types and helpers.
+
+/// The entrypoint to implementing a kernel module.
+///
+/// For any teardown or cleanup operations, your type may implement [`Drop`].
+pub trait Module: Sized + Sync + Send {
+    /// Called at module initialization time.
+    ///
+    /// Use this method to perform whatever setup or registration your module
+    /// should do.
+    ///
+    /// Equivalent to the `module_init` macro in the C API.
+    fn init(module: &'static ThisModule) -> crate::error::Result<Self>;
+}
+
+/// A module that is pinned and initialised in-place.
+pub trait InPlaceModule: Sync + Send {
+    /// Creates an initialiser for the module.
+    ///
+    /// It is called when the module is loaded.
+    fn init(module: &'static ThisModule) -> impl pin_init::PinInit<Self, crate::error::Error>;
+}
+
+impl<T: Module> InPlaceModule for T {
+    fn init(module: &'static ThisModule) -> impl pin_init::PinInit<Self, crate::error::Error> {
+        let initer = move |slot: *mut Self| {
+            let m = <Self as Module>::init(module)?;
+
+            // SAFETY: `slot` is valid for write per the contract with `pin_init_from_closure`.
+            unsafe { slot.write(m) };
+            Ok(())
+        };
+
+        // SAFETY: On success, `initer` always fully initialises an instance of `Self`.
+        unsafe { pin_init::pin_init_from_closure(initer) }
+    }
+}
+
+/// Metadata attached to a [`Module`] or [`InPlaceModule`].
+pub trait ModuleMetadata {
+    /// The name of the module as specified in the `module!` macro.
+    const NAME: &'static crate::str::CStr;
+}
+
+/// Equivalent to `THIS_MODULE` in the C API.
+///
+/// C header: [`include/linux/init.h`](srctree/include/linux/init.h)
+pub struct ThisModule(*mut crate::bindings::module);
+
+// SAFETY: `THIS_MODULE` may be used from all threads within a module.
+unsafe impl Sync for ThisModule {}
+
+impl ThisModule {
+    /// Creates a [`ThisModule`] given the `THIS_MODULE` pointer.
+    ///
+    /// # Safety
+    ///
+    /// The pointer must be equal to the right `THIS_MODULE`.
+    pub const unsafe fn from_ptr(ptr: *mut crate::bindings::module) -> ThisModule {
+        ThisModule(ptr)
+    }
+
+    /// Access the raw pointer for this module.
+    ///
+    /// It is up to the user to use it correctly.
+    pub const fn as_ptr(&self) -> *mut crate::bindings::module {
+        self.0
+    }
+}
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index 3ca99db5cccf2..8b7036b8fe480 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -659,7 +659,11 @@ pub fn register(
         // the `drivers` slice are initialized properly. `drivers` will not be moved.
         // So it's just an FFI call.
         to_result(unsafe {
-            bindings::phy_drivers_register(drivers[0].0.get(), drivers.len().try_into()?, module.0)
+            bindings::phy_drivers_register(
+                drivers[0].0.get(),
+                drivers.len().try_into()?,
+                module.as_ptr(),
+            )
         })?;
         // INVARIANT: The `drivers` slice is successfully registered to the kernel via `phy_drivers_register`.
         Ok(Registration { drivers })
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index af74ddff6114d..916ed2cb6b70b 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -86,7 +86,7 @@ unsafe fn register(
 
         // SAFETY: `pdrv` is guaranteed to be a valid `DriverType`.
         to_result(unsafe {
-            bindings::__pci_register_driver(pdrv.get(), module.0, name.as_char_ptr())
+            bindings::__pci_register_driver(pdrv.get(), module.as_ptr(), name.as_char_ptr())
         })
     }
 
diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs
index 8917d4ee499fb..9fdbafd53bc21 100644
--- a/rust/kernel/platform.rs
+++ b/rust/kernel/platform.rs
@@ -82,7 +82,7 @@ unsafe fn register(
         }
 
         // SAFETY: `pdrv` is guaranteed to be a valid `DriverType`.
-        to_result(unsafe { bindings::__platform_driver_register(pdrv.get(), module.0) })
+        to_result(unsafe { bindings::__platform_driver_register(pdrv.get(), module.as_ptr()) })
     }
 
     unsafe fn unregister(pdrv: &Opaque<Self::DriverType>) {
diff --git a/rust/kernel/usb.rs b/rust/kernel/usb.rs
index 9c17a672cd275..213db32727c17 100644
--- a/rust/kernel/usb.rs
+++ b/rust/kernel/usb.rs
@@ -63,7 +63,7 @@ unsafe fn register(
 
         // SAFETY: `udrv` is guaranteed to be a valid `DriverType`.
         to_result(unsafe {
-            bindings::usb_register_driver(udrv.get(), module.0, name.as_char_ptr())
+            bindings::usb_register_driver(udrv.get(), module.as_ptr(), name.as_char_ptr())
         })
     }
 

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 00/10] Fix missing fops.owner in Rust DRM/misc abstractions
From: Alvin Sun @ 2026-06-24 14:59 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun

During tyr debugfs development, a kernel NULL pointer dereference was
encountered after `rmmod tyr` while gnome-shell still held /dev/card1 open:

```
  [158827.868132] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
  [158827.868918] Mem abort info:
  [158827.869177]   ESR = 0x0000000086000004
  [158827.869519]   EC = 0x21: IABT (current EL), IL = 32 bits
  [158827.870000]   SET = 0, FnV = 0
  [158827.870281]   EA = 0, S1PTW = 0
  [158827.870571]   FSC = 0x04: level 0 translation fault
  [158827.871043] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000108dec000
  [158827.871623] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
  [158827.872242] Internal error: Oops: 0000000086000004 [#1]  SMP
  [158827.872246] Modules linked in: tyr sunrpc snd_soc_simple_card rk805_pwrkey snd_soc_simple_card_utils rtw88_8822bu display_connector rtw88_usb rtw88_8822b snd_soc_rockchip_i2s_tdm snd_soc_hdmi_codec
  rtw88_core]
  [158827.872337] CPU: 4 UID: 1000 PID: 11276 Comm: gnome-s:disk$0 Tainted: G                 N  7.1.0-rc1+ #331 PREEMPT
  [158827.880534] Tainted: [N]=TEST
  [158827.880535] Hardware name: FriendlyElec NanoPi R6C/NanoPi R6C, BIOS v1.1 04/09/2025
  [158827.880538] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
  [158827.880542] pc : 0x0
  [158827.880547] lr : _RNvNtCs257m05FHVbX_3tyr2vm8pt_unmap+0x8c/0x12c [tyr]
  [158827.880578] sp : ffff800083c236b0
  [158827.880579] x29: ffff800083c236d0 x28: ffff00013f8a0000 x27: 0000000000000000
  [158827.880585] x26: 000000000000007c x25: ffff000108e6ed80 x24: 0000000000401000
  [158827.880590] x23: 0000000000000000 x22: 0000000040000000 x21: 0000000000001000
  [158827.880595] x20: ffff00010f778138 x19: 0000000000400000 x18: 00000000ffffffff
  [158827.880600] x17: 000000040044ffff x16: 045000f2b5503510 x15: 0720072007200720
  [158827.880606] x14: 0720072007200720 x13: 0000000000401000 x12: 0000000000400000
  [158827.880611] x11: ffff800083c239d0 x10: ffff000141e4fd88 x9 : 0000000000000000
  [158827.880615] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000400000
  [158827.880620] x5 : ffff00013f8a0000 x4 : 0000000000000000 x3 : 0000000000000001
  [158827.880625] x2 : 0000000000001000 x1 : 0000000000400000 x0 : ffff00010f778138
  [158827.880630] Call trace:
  [158827.880632]  0x0 (P)
  [158827.880635]  _RNvXs6_NtCs257m05FHVbX_3tyr2vmNtB5_9GpuVmDataNtNtNtCsgmSOfgXi5CZ_6kernel3drm5gpuvm11DriverGpuVm13sm_step_unmap+0x3c/0x120 [tyr]
  [158827.891166]  _RNvMs4_NtNtNtCsgmSOfgXi5CZ_6kernel3drm5gpuvm6sm_opsINtB7_5GpuVmNtNtCs257m05FHVbX_3tyr2vm9GpuVmDataE13sm_step_unmapB13_+0x18/0x34 [tyr]
  [158827.891187]  op_unmap_cb+0x78/0xb0
  [158827.891196]  __drm_gpuvm_sm_unmap+0x18c/0x1b4
  [158827.891204]  drm_gpuvm_sm_unmap+0x38/0x4c
  [158827.891209]  _RNvMs5_NtCs257m05FHVbX_3tyr2vmNtB5_2Vm7exec_op+0x1cc/0x254 [tyr]
  [158827.894085]  _RNvMs5_NtCs257m05FHVbX_3tyr2vmNtB5_2Vm11unmap_range+0x124/0x188 [tyr]
  [158827.894105]  _RINvNtCs5hGKnPbRUFW_4core3ptr13drop_in_placeNtNtCs257m05FHVbX_3tyr3gem8KernelBoEBK_+0x44/0xd8 [tyr]
  [158827.894125]  _RINvNtCs5hGKnPbRUFW_4core3ptr13drop_in_placeINtNtNtCsgmSOfgXi5CZ_6kernel5alloc4kvec3VecNtNtCs257m05FHVbX_3tyr2fw7SectionNtNtBL_9allocator7KmallocEEB1r_+0x3c/0x100 [tyr]
  [158827.894147]  _RINvNtCs5hGKnPbRUFW_4core3ptr13drop_in_placeINtNtNtCsgmSOfgXi5CZ_6kernel4sync3arc3ArcNtNtCs257m05FHVbX_3tyr2fw8FirmwareEEB1p_+0x94/0x190 [tyr]
  [158827.894167]  _RNvMs4_NtNtCsgmSOfgXi5CZ_6kernel3drm6deviceINtB5_6DeviceNtNtCs257m05FHVbX_3tyr6driver12TyrDrmDriverE7releaseBW_+0x30/0x98 [tyr]
  [158827.899550]  drm_dev_put.part.0+0x88/0xc0
  [158827.899557]  drm_minor_release+0x18/0x28
  [158827.899562]  drm_release+0x144/0x170
  [158827.899567]  __fput+0xe4/0x30c
  [158827.899573]  ____fput+0x14/0x20
  [158827.899579]  task_work_run+0x7c/0xe8
  [158827.899586]  do_exit+0x2a8/0xac4
  [158827.899590]  do_group_exit+0x34/0x90
  [158827.899594]  get_signal+0xaac/0xabc
  [158827.899599]  arch_do_signal_or_restart+0x90/0x3e8
  [158827.899606]  exit_to_user_mode_loop+0x140/0x1d0
  [158827.899613]  el0_svc+0x2f4/0x2f8
  [158827.899620]  el0t_64_sync_handler+0xa0/0xe4
  [158827.899627]  el0t_64_sync+0x198/0x19c
  [158827.899632] ---[ end trace 0000000000000000 ]---
```

The root cause: `fops.owner` was `NULL` in Rust DRM drivers, so the kernel
never blocked module unloading while file descriptors were open. This leads to
use-after-free when drm_release (or other fops) is called on freed module code.

The series moves `THIS_MODULE` into the `ModuleMetadata` as a const, threads it
through `#[vtable]` to set `fops.owner` in DRM/miscdevice, and updates configfs
and rnull to use `this_module::<LocalModule>()`.

Assisted-by: opencode:glm-5.2
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
Changes in v6:
- Update MAINTAINERS to cover the new `rust/kernel/module.rs`.
- Link to v5: https://lore.kernel.org/r/20260624-fix-fops-owner-v5-0-aa1cba242f05@linux.dev

Changes in v5:
- Add `#[inline]` to the `this_module()` helper.
- Fix configfs doc comment to reference `crate::LocalModule` instead of
  bare `LocalModule`.
- Link to v4: https://lore.kernel.org/r/20260623-fix-fops-owner-v4-0-0daf5f077d5c@linux.dev

Changes in v4:
- Move module-related types into a new `rust/kernel/module.rs`.
- Migrate binder from the `module!`-generated `THIS_MODULE` static to
  `this_module::<LocalModule>()`.
- Reorganise the series so that every commit builds independently, and
  drop the legacy `THIS_MODULE` static once all users are migrated.
- Link to v3: https://lore.kernel.org/r/20260622-fix-fops-owner-v3-0-49d45cb37032@linux.dev

Changes in v3:
- Renamed vtable associated type `ThisModule` to `OwnerModule`
- Added `this_module()` helper for ergonomic `THIS_MODULE` access
- Refined vtable macro implementation: one-liner detection and single `defined_items` set
- Reordered commits to place doctest fallback before vtable auto-insert
- Link to v2: https://lore.kernel.org/r/20260521-fix-fops-owner-v2-0-fd99079c5a04@linux.dev

Changes in v2:
- Merged old `static THIS_MODULE` and v1's `MODULE_PTR` into a single
  `ModuleMetadata::THIS_MODULE` const
- `#[vtable]` macro now auto-inserts `type ThisModule`, removing all per-driver
  manual patches from v1
- Added configfs & rnull usage site updates and doctest `LocalModule` fallback
- Link to v1: https://lore.kernel.org/r/20260519-fix-fops-owner-v1-0-2ded9830da14@linux.dev

---
Alvin Sun (10):
      rust: module: move module types into `module.rs`
      rust: module: add `THIS_MODULE` const to `ModuleMetadata` trait
      rust: doctest: add LocalModule fallback for #[vtable] ThisModule
      rust: macros: auto-insert OwnerModule in #[vtable]
      rust: drm: set fops.owner from driver module pointer
      rust: miscdevice: set fops.owner from driver module pointer
      rust: configfs: use `LocalModule` for `THIS_MODULE`
      rust: binder: use `LocalModule` for `THIS_MODULE`
      rust: macros: remove `THIS_MODULE` static from `module!`
      rust: module: update MAINTAINERS to cover module.rs

 MAINTAINERS                                |  2 +-
 drivers/android/binder/rust_binder_main.rs |  3 +-
 drivers/block/rnull/configfs.rs            |  6 +--
 rust/kernel/auxiliary.rs                   |  2 +-
 rust/kernel/configfs.rs                    |  8 +--
 rust/kernel/drm/device.rs                  |  3 +-
 rust/kernel/drm/gem/mod.rs                 |  4 +-
 rust/kernel/i2c.rs                         |  2 +-
 rust/kernel/lib.rs                         | 75 +++-------------------------
 rust/kernel/miscdevice.rs                  |  4 +-
 rust/kernel/module.rs                      | 80 ++++++++++++++++++++++++++++++
 rust/kernel/net/phy.rs                     |  6 ++-
 rust/kernel/pci.rs                         |  2 +-
 rust/kernel/platform.rs                    |  2 +-
 rust/kernel/usb.rs                         |  2 +-
 rust/macros/lib.rs                         |  6 +++
 rust/macros/module.rs                      | 34 ++++++-------
 rust/macros/vtable.rs                      | 41 +++++++++++++--
 scripts/rustdoc_test_gen.rs                | 16 ++++++
 19 files changed, 189 insertions(+), 109 deletions(-)
---
base-commit: b7e5ac83cb16f7ffd11dc23736f84276602100ed
change-id: 20260519-fix-fops-owner-e3a77bb27c6c
prerequisite-change-id: 20260519-miscdev-use-format-9ab7e83b1c11:v3
prerequisite-patch-id: 405b334ff0d48ad350014f05a2321bdbaa025400
prerequisite-patch-id: 604b631c81d5423f4ebb2e12ba2d22e9ce371bfc
prerequisite-patch-id: cb550d94cefe01920e0d3ced2b2bcbecd76f3907
prerequisite-patch-id: 3bc830839742591460cb86d9472c04f4686dc600
prerequisite-patch-id: 571058244bc8c7088638d2e3225713011246c7e9
prerequisite-patch-id: 347c5a3c6dbef9832bfce8419fc23e6e08ba477f
prerequisite-patch-id: 3e202d988b56b88446f7535e90d3f00cf5f15701

Best regards,
-- 
Alvin Sun <alvin.sun@linux.dev>



^ permalink raw reply

* [PATCH v6 02/10] rust: module: add `THIS_MODULE` const to `ModuleMetadata` trait
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Since `const_refs_to_static` has been stable as of the MSRV bump, a
`ThisModule` pointer can now be used in const contexts.

Add a `THIS_MODULE` const to the `ModuleMetadata` trait so that modules
can provide their `ThisModule` pointer in const contexts such as static
`file_operations`.

Add a `this_module()` helper to retrieve the `THIS_MODULE` pointer of a
given module type, and update `__init` to use it instead of the
`THIS_MODULE` static generated by the `module!` macro.

The `static THIS_MODULE` generated by the `module!` macro is retained
for backwards compatibility with existing users and removed in a later
patch once all references have been migrated.

Assisted-by: opencode:glm-5.2
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 rust/kernel/module.rs |  9 +++++++++
 rust/macros/module.rs | 18 +++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/rust/kernel/module.rs b/rust/kernel/module.rs
index be242a82e86d2..d713705984477 100644
--- a/rust/kernel/module.rs
+++ b/rust/kernel/module.rs
@@ -42,6 +42,15 @@ fn init(module: &'static ThisModule) -> impl pin_init::PinInit<Self, crate::erro
 pub trait ModuleMetadata {
     /// The name of the module as specified in the `module!` macro.
     const NAME: &'static crate::str::CStr;
+
+    /// The module's `THIS_MODULE` pointer.
+    const THIS_MODULE: ThisModule;
+}
+
+/// Returns a reference to the `THIS_MODULE` of the given module type.
+#[inline]
+pub const fn this_module<M: ModuleMetadata>() -> &'static ThisModule {
+    &M::THIS_MODULE
 }
 
 /// Equivalent to `THIS_MODULE` in the C API.
diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index 06c18e2075083..aa9a618d5d19e 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -519,6 +519,22 @@ pub(crate) fn module(info: ModuleInfo) -> Result<TokenStream> {
 
         impl ::kernel::ModuleMetadata for #type_ {
             const NAME: &'static ::kernel::str::CStr = #name_cstr;
+
+            #[cfg(MODULE)]
+            const THIS_MODULE: ::kernel::ThisModule = {
+                extern "C" {
+                    static __this_module: ::kernel::types::Opaque<::kernel::bindings::module>;
+                }
+
+                // SAFETY: `__this_module` is constructed by the kernel at load time
+                // and lives until the module is unloaded.
+                unsafe { ::kernel::ThisModule::from_ptr(__this_module.get()) }
+            };
+
+            #[cfg(not(MODULE))]
+            const THIS_MODULE: ::kernel::ThisModule = unsafe {
+                ::kernel::ThisModule::from_ptr(::core::ptr::null_mut())
+            };
         }
 
         // Double nested modules, since then nobody can access the public items inside.
@@ -616,7 +632,7 @@ pub extern "C" fn #ident_exit() {
                 /// This function must only be called once.
                 unsafe fn __init() -> ::kernel::ffi::c_int {
                     let initer = <super::super::LocalModule as ::kernel::InPlaceModule>::init(
-                        &super::super::THIS_MODULE
+                        ::kernel::module::this_module::<super::super::LocalModule>()
                     );
                     // SAFETY: No data race, since `__MOD` can only be accessed by this module
                     // and there only `__init` and `__exit` access it. These functions are only

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 03/10] rust: doctest: add LocalModule fallback for #[vtable] ThisModule
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Add a `LocalModule` struct with a null-pointer `ModuleMetadata` impl
in the doctest harness, so that `crate::LocalModule` (auto-inserted
by `#[vtable]`) resolves correctly when there is no `module!` macro.

Assisted-by: opencode:glm-5.2
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 scripts/rustdoc_test_gen.rs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs
index ee76e96b41eea..198af4e446c8c 100644
--- a/scripts/rustdoc_test_gen.rs
+++ b/scripts/rustdoc_test_gen.rs
@@ -239,6 +239,22 @@ macro_rules! assert_eq {{
 
 const __LOG_PREFIX: &[u8] = b"rust_doctests_kernel\0";
 
+/// Dummy module type for doctest context.
+struct LocalModule;
+
+use kernel::{{
+    str::CStr,
+    ModuleMetadata,
+    ThisModule, //
+}};
+use core::ptr::null_mut;
+
+impl ModuleMetadata for LocalModule {{
+    const NAME: &'static CStr = c"rust_doctests_kernel";
+    // SAFETY: `try_module_get`/`module_put` handle null module pointers gracefully.
+    const THIS_MODULE: ThisModule = unsafe {{ ThisModule::from_ptr(null_mut()) }};
+}}
+
 {rust_tests}
 "#
     )

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 04/10] rust: macros: auto-insert OwnerModule in #[vtable]
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Auto-add `type OwnerModule: ::kernel::ModuleMetadata;` as a required
associated type on the trait side if not already defined, and
auto-insert `type OwnerModule = crate::LocalModule;` on the impl side
if not explicitly provided, eliminating the need to manually declare
and implement `OwnerModule` in every vtable trait and impl.

Assisted-by: opencode:glm-5.2
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Suggested-by: Gary Guo <gary@garyguo.net>
Link: https://lore.kernel.org/all/DIMMWHUOLPSH.13JFRHDKDQJGO@garyguo.net
Reviewed-by: Gary Guo <gary@garyguo.net>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 rust/macros/lib.rs    |  6 ++++++
 rust/macros/vtable.rs | 41 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs
index 2cfd59e0f9e7c..bc7ded353c5ca 100644
--- a/rust/macros/lib.rs
+++ b/rust/macros/lib.rs
@@ -176,6 +176,12 @@ pub fn module(input: TokenStream) -> TokenStream {
 ///
 /// This macro should not be used when all functions are required.
 ///
+/// Additionally, this macro automatically handles the `OwnerModule`
+/// associated type: on the trait side, `type OwnerModule: ModuleMetadata;`
+/// is added as a required associated type if not already defined; on the
+/// impl side, `type OwnerModule = LocalModule;` is automatically inserted
+/// if not explicitly defined.
+///
 /// # Examples
 ///
 /// ```
diff --git a/rust/macros/vtable.rs b/rust/macros/vtable.rs
index c6510b0c4ea1d..be9a5ed8abe5e 100644
--- a/rust/macros/vtable.rs
+++ b/rust/macros/vtable.rs
@@ -30,6 +30,22 @@ fn handle_trait(mut item: ItemTrait) -> Result<ItemTrait> {
          const USE_VTABLE_ATTR: ();
     });
 
+    // Add `type OwnerModule: ModuleMetadata` as a required associated type if
+    // the trait does not already define it.
+    if !item
+        .items
+        .iter()
+        .any(|i| matches!(i, TraitItem::Type(t) if t.ident == "OwnerModule"))
+    {
+        gen_items.push(parse_quote! {
+            /// The module implementing this vtable trait.
+            ///
+            /// Automatically set to `crate::LocalModule` by the `#[vtable]`
+            /// impl macro.
+            type OwnerModule: ::kernel::ModuleMetadata;
+        });
+    }
+
     for item in &item.items {
         if let TraitItem::Fn(fn_item) = item {
             let name = &fn_item.sig.ident;
@@ -57,12 +73,18 @@ fn handle_trait(mut item: ItemTrait) -> Result<ItemTrait> {
 
 fn handle_impl(mut item: ItemImpl) -> Result<ItemImpl> {
     let mut gen_items = Vec::new();
-    let mut defined_consts = HashSet::new();
+    let mut defined_items = HashSet::new();
 
-    // Iterate over all user-defined constants to gather any possible explicit overrides.
+    // Iterate over all user-defined items to gather any possible explicit overrides.
     for item in &item.items {
-        if let ImplItem::Const(const_item) = item {
-            defined_consts.insert(const_item.ident.clone());
+        match item {
+            ImplItem::Const(const_item) => {
+                defined_items.insert(const_item.ident.clone());
+            }
+            ImplItem::Type(type_item) => {
+                defined_items.insert(type_item.ident.clone());
+            }
+            _ => {}
         }
     }
 
@@ -70,6 +92,15 @@ fn handle_impl(mut item: ItemImpl) -> Result<ItemImpl> {
         const USE_VTABLE_ATTR: () = ();
     });
 
+    // Auto-insert `type OwnerModule = crate::LocalModule` if not explicitly defined.
+    // `crate::LocalModule` resolves to the real module type (via `module!`) or a
+    // dummy fallback in non-module contexts (e.g., doctests).
+    if !defined_items.contains(&parse_quote!(OwnerModule)) {
+        gen_items.push(parse_quote! {
+            type OwnerModule = crate::LocalModule;
+        });
+    }
+
     for item in &item.items {
         if let ImplItem::Fn(fn_item) = item {
             let name = &fn_item.sig.ident;
@@ -78,7 +109,7 @@ fn handle_impl(mut item: ItemImpl) -> Result<ItemImpl> {
                 name.span(),
             );
             // Skip if it's declared already -- this allows user override.
-            if defined_consts.contains(&gen_const_name) {
+            if defined_items.contains(&gen_const_name) {
                 continue;
             }
             let cfg_attrs = crate::helpers::gather_cfg_attrs(&fn_item.attrs);

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 05/10] rust: drm: set fops.owner from driver module pointer
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Change `create_fops()` to accept an owner module pointer instead of
hardcoding `null_mut()`, ensuring the kernel correctly tracks the
module owning the DRM device's file operations.

Assisted-by: opencode:glm-5.2
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 rust/kernel/drm/device.rs  | 3 ++-
 rust/kernel/drm/gem/mod.rs | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 403fc35353c74..d92cacb665366 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -111,7 +111,8 @@ impl<T: drm::Driver> Device<T> {
         fops: &Self::GEM_FOPS,
     };
 
-    const GEM_FOPS: bindings::file_operations = drm::gem::create_fops();
+    const GEM_FOPS: bindings::file_operations =
+        drm::gem::create_fops(crate::module::this_module::<T::OwnerModule>().as_ptr());
 
     /// Create a new `drm::Device` for a `drm::Driver`.
     pub fn new(dev: &device::Device, data: impl PinInit<T::Data, Error>) -> Result<ARef<Self>> {
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index 01b5bd47a3332..9a203efc59116 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -357,10 +357,10 @@ impl<T: DriverObject> AllocImpl for Object<T> {
     };
 }
 
-pub(super) const fn create_fops() -> bindings::file_operations {
+pub(super) const fn create_fops(owner: *mut bindings::module) -> bindings::file_operations {
     let mut fops: bindings::file_operations = pin_init::zeroed();
 
-    fops.owner = core::ptr::null_mut();
+    fops.owner = owner;
     fops.open = Some(bindings::drm_open);
     fops.release = Some(bindings::drm_release);
     fops.unlocked_ioctl = Some(bindings::drm_ioctl);

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 07/10] rust: configfs: use `LocalModule` for `THIS_MODULE`
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Replace the `THIS_MODULE` static reference in the `configfs_attrs!`
macro with `this_module::<LocalModule>()`, and update
rnull to import `LocalModule` instead of `THIS_MODULE`, consistent
with the move of `THIS_MODULE` into the `ModuleMetadata` trait.

Assisted-by: opencode:glm-5.2
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 drivers/block/rnull/configfs.rs | 6 ++----
 rust/kernel/configfs.rs         | 8 +++++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index c10a55fc58948..b2547ad1e5ddd 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
-use super::{
-    NullBlkDevice,
-    THIS_MODULE, //
-};
+use super::NullBlkDevice;
+use crate::LocalModule;
 use kernel::{
     block::mq::gen_disk::{
         GenDisk,
diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs
index 2339c6467325d..c31d7882e216d 100644
--- a/rust/kernel/configfs.rs
+++ b/rust/kernel/configfs.rs
@@ -875,7 +875,7 @@ fn as_ptr(&self) -> *const bindings::config_item_type {
 ///                 configfs::Subsystem<Configuration>,
 ///                 Configuration
 ///                 >::new_with_child_ctor::<N,Child>(
-///             &THIS_MODULE,
+///             ::kernel::module::this_module::<crate::LocalModule>(),
 ///             &CONFIGURATION_ATTRS
 ///         );
 ///
@@ -1021,7 +1021,8 @@ macro_rules! configfs_attrs {
 
                     static [< $data:upper _TPE >] : $crate::configfs::ItemType<$container, $data>  =
                         $crate::configfs::ItemType::<$container, $data>::new::<N>(
-                            &THIS_MODULE, &[<$ data:upper _ATTRS >]
+                            $crate::module::this_module::<LocalModule>(),
+                            &[<$ data:upper _ATTRS >]
                         );
                 )?
 
@@ -1030,7 +1031,8 @@ macro_rules! configfs_attrs {
                         $crate::configfs::ItemType<$container, $data>  =
                             $crate::configfs::ItemType::<$container, $data>::
                             new_with_child_ctor::<N, $child>(
-                                &THIS_MODULE, &[<$ data:upper _ATTRS >]
+                                $crate::module::this_module::<LocalModule>(),
+                                &[<$ data:upper _ATTRS >]
                             );
                 )?
 

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 08/10] rust: binder: use `LocalModule` for `THIS_MODULE`
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Replace the `THIS_MODULE` static reference in the binder fops with
`this_module::<LocalModule>()`, consistent with the move of
`THIS_MODULE` into the `ModuleMetadata` trait.

Assisted-by: opencode:glm-5.2
Reviewed-by: Gary Guo <gary@garyguo.net>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 drivers/android/binder/rust_binder_main.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/android/binder/rust_binder_main.rs b/drivers/android/binder/rust_binder_main.rs
index dc1941cd2407b..d6ceebbd5f94e 100644
--- a/drivers/android/binder/rust_binder_main.rs
+++ b/drivers/android/binder/rust_binder_main.rs
@@ -17,6 +17,7 @@
     bindings::{self, seq_file},
     fs::File,
     list::{ListArc, ListArcSafe, ListLinksSelfPtr, TryNewListArc},
+    module::this_module,
     prelude::*,
     seq_file::SeqFile,
     seq_print,
@@ -318,7 +319,7 @@ unsafe impl<T> Sync for AssertSync<T> {}
     let zeroed_ops = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
 
     let ops = kernel::bindings::file_operations {
-        owner: THIS_MODULE.as_ptr(),
+        owner: this_module::<LocalModule>().as_ptr(),
         poll: Some(rust_binder_poll),
         unlocked_ioctl: Some(rust_binder_ioctl),
         compat_ioctl: bindings::compat_ptr_ioctl,

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 06/10] rust: miscdevice: set fops.owner from driver module pointer
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Set the miscdevice fops owner field from the driver module pointer
via the `this_module::<T::OwnerModule>()` helper, instead of
defaulting to null.

Assisted-by: opencode:glm-5.2
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 rust/kernel/miscdevice.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
index 83ce50def5ac9..2a4329f98614e 100644
--- a/rust/kernel/miscdevice.rs
+++ b/rust/kernel/miscdevice.rs
@@ -24,12 +24,13 @@
         IovIterSource, //
     },
     mm::virt::VmaNew,
+    module::this_module,
     prelude::*,
     seq_file::SeqFile,
     types::{
         ForeignOwnable,
         Opaque, //
-    },
+    }, //
 };
 use core::marker::PhantomData;
 
@@ -430,6 +431,7 @@ impl<T: MiscDevice> MiscdeviceVTable<T> {
         } else {
             None
         },
+        owner: this_module::<T::OwnerModule>().as_ptr(),
         ..pin_init::zeroed()
     };
 

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 09/10] rust: macros: remove `THIS_MODULE` static from `module!`
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

All users have been migrated to `ModuleMetadata::THIS_MODULE` const or
`this_module::<LocalModule>()` helper. The `static THIS_MODULE`
generated by the `module!` macro is no longer referenced anywhere,
so remove it to avoid having two sources of the same `ThisModule`
pointer.

Assisted-by: opencode:glm-5.2
Reviewed-by: Andreas Hindborg <a.hindborg@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 rust/macros/module.rs | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index aa9a618d5d19e..23b6a1b456b80 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -497,22 +497,6 @@ pub(crate) fn module(info: ModuleInfo) -> Result<TokenStream> {
         /// Used by the printing macros, e.g. [`info!`].
         const __LOG_PREFIX: &[u8] = #name_cstr.to_bytes_with_nul();
 
-        // SAFETY: `__this_module` is constructed by the kernel at load time and will not be
-        // freed until the module is unloaded.
-        #[cfg(MODULE)]
-        static THIS_MODULE: ::kernel::ThisModule = unsafe {
-            extern "C" {
-                static __this_module: ::kernel::types::Opaque<::kernel::bindings::module>;
-            };
-
-            ::kernel::ThisModule::from_ptr(__this_module.get())
-        };
-
-        #[cfg(not(MODULE))]
-        static THIS_MODULE: ::kernel::ThisModule = unsafe {
-            ::kernel::ThisModule::from_ptr(::core::ptr::null_mut())
-        };
-
         /// The `LocalModule` type is the type of the module created by `module!`,
         /// `module_pci_driver!`, `module_platform_driver!`, etc.
         type LocalModule = #type_;

-- 
2.43.0



^ permalink raw reply related

* [PATCH v6 10/10] rust: module: update MAINTAINERS to cover module.rs
From: Alvin Sun @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Luis Chamberlain, Petr Pavlu, Daniel Gomez,
	Sami Tolvanen, Aaron Tomlin, Greg Kroah-Hartman,
	Rafael J. Wysocki, David Airlie, Simona Vetter, Daniel Almeida,
	Arnd Bergmann, Brendan Higgins, David Gow, Rae Moar, Breno Leitao,
	Jens Axboe, Dave Ertman, Leon Romanovsky, Igor Korotin,
	FUJITA Tomonori, Bjorn Helgaas, Krzysztof Wilczyński,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas
  Cc: rust-for-linux, linux-modules, driver-core, dri-devel, nova-gpu,
	linux-kselftest, kunit-dev, linux-block, linux-kernel, netdev,
	linux-pci, Alvin Sun
In-Reply-To: <20260624-fix-fops-owner-v6-0-5295e333cb3e@linux.dev>

Module types now live in `rust/kernel/module.rs` alongside
`rust/kernel/module_param.rs`. Update the MODULE SUPPORT file pattern
from `rust/kernel/module_param.rs` to `rust/kernel/module*.rs` so both
files are covered.

Assisted-by: opencode:glm-5.2
Link: https://lore.kernel.org/rust-for-linux/8ea21b29-9baf-4926-a16f-7d21c5a1a1b8@suse.com
Signed-off-by: Alvin Sun <alvin.sun@linux.dev>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e035a3be797c4..74733de3e41ee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17984,7 +17984,7 @@ F:	include/linux/module*.h
 F:	kernel/module/
 F:	lib/test_kmod.c
 F:	lib/tests/module/
-F:	rust/kernel/module_param.rs
+F:	rust/kernel/module*.rs
 F:	rust/macros/module.rs
 F:	scripts/module*
 F:	tools/testing/selftests/kmod/

-- 
2.43.0



^ permalink raw reply related

* Re: [PATCH v2] block: serialize elevator changes for the same queue using a writer lock
From: Ming Lei @ 2026-06-24 15:09 UTC (permalink / raw)
  To: Shin'ichiro Kawasaki; +Cc: linux-block, Jens Axboe, Nilay Shroff
In-Reply-To: <ajvCBkxdTCNt9JWI@shinmob>

On Wed, Jun 24, 2026 at 08:48:16PM +0900, Shin'ichiro Kawasaki wrote:
> On Jun 24, 2026 / 04:44, Ming Lei wrote:
> > On Tue, Jun 23, 2026 at 10:32:38AM +0900, Shin'ichiro Kawasaki wrote:
> [...]
> > > Please refer to [1] for details of the failure. Also, I created a
> > > blktests test case that recreates the hang [2], which I used to test the
> > > fix.
> > > 
> > > * Changes from RFC v1
> > > - Instead of adding a new mutex to struct request_queue, replace the
> > >   reader lock on update_nr_hwq_lock with the writer lock in
> > >   elv_iosched_store().
> > > 
> > > [1] https://lore.kernel.org/linux-block/20260611074200.474676-1-shinichiro.kawasaki@wdc.com/
> > > [2] https://github.com/kawasaki/blktests/commit/8e80b3ccc0bbbe3f209d00eacd138d020de97fc6
> > > 
> > >  block/elevator.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> [...]
> > I feel this is still abuse of the above lock, which serves writer vs
> > reader wrt. updating hw queue.
> > 
> > How about the following fix?
> 
> (snip)
> 
> Thank you for the idea. I applied the suggested fix on top of the v7.1 kernel,
> and ran the test case that does the concurrent write to the sysfs sched file
> [2]. Unfortunately, the test case hung. Before the hang, the kernel reported
> WARNs in sysfs_create_dir_ns() [3]. KASAN slab-use-after-free was observed also.
> I also noticed that another WARN was observed during boot [4].

Looks this change isn't enough, and it is a bit hard to deal with the
two-stage switch by re-lock, and it may require sched debugfs & elevator queue
reg/unreg refactor.

Let's fix with your simpler way first.


Thanks,
Ming

^ permalink raw reply

* Re: [PATCH v2] block: serialize elevator changes for the same queue using a writer lock
From: Ming Lei @ 2026-06-24 15:12 UTC (permalink / raw)
  To: Shin'ichiro Kawasaki; +Cc: linux-block, Jens Axboe, Nilay Shroff
In-Reply-To: <20260623013238.642052-1-shinichiro.kawasaki@wdc.com>

On Tue, Jun 23, 2026 at 10:32:38AM +0900, Shin'ichiro Kawasaki wrote:
> When elevator_change() is called concurrently for the same queue, the
> elevator_change_done() function runs concurrently as well. This function
> adds or deletes kobjects for the debugfs entry of the queue. Then the
> concurrent calls cause memory corruption of the kobjects and result in a
> process hang. The core part of the elevator switch is protected by queue
> freeze and q->elevator_lock. However, since the commit 559dc11143eb
> ("block: move elv_register[unregister]_queue out of elevator_lock"), the
> elevator_change_done() is not serialized. Hence the memory corruption
> and the hang.
> 
> The failures are observed when udev-worker writes to a sysfs
> queue/scheduler attribute file while the blktests test case block/005
> writes to the same attribute file. The failure also can be recreated by
> running two processes that write to the same queue/scheduler file
> concurrently. The failure is observed since another commit 370ac285f23a
> ("block: avoid cpu_hotplug_lock depedency on freeze_lock"). This commit
> changed the behavior of queue freeze and it unveiled the failure.
> 
> Fix the failure by changing elv_iosched_store() to acquire
> update_nr_hwq_lock as the writer lock instead of the reader lock. This
> serializes the whole elevator switch steps, including the
> elevator_change_done() call.
> 
> Fixes: 559dc11143eb ("block: move elv_register[unregister]_queue out of elevator_lock")
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> ---
> I observed that the blktests test case block/005 hung on a specific
> server hardware using a specific HDD as a block device. During the test
> case run, the kernel reported KASAN null-ptr-deref and slab-use-after-
> free errors. The failure happened when a sysfs queue/scheduler attribute
> file is written concurrently. I reported the failure and shared a
> candidate fix patch as RFC [1]. Based on the comments and discussion on
> the RFC patch, I propose this v2 patch that avoids introducing a new
> lock. My thanks go to Ming and Nilay for the discussion.
> 
> Please refer to [1] for details of the failure. Also, I created a
> blktests test case that recreates the hang [2], which I used to test the
> fix.
> 
> * Changes from RFC v1
> - Instead of adding a new mutex to struct request_queue, replace the
>   reader lock on update_nr_hwq_lock with the writer lock in
>   elv_iosched_store().
> 
> [1] https://lore.kernel.org/linux-block/20260611074200.474676-1-shinichiro.kawasaki@wdc.com/
> [2] https://github.com/kawasaki/blktests/commit/8e80b3ccc0bbbe3f209d00eacd138d020de97fc6
> 
>  block/elevator.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/block/elevator.c b/block/elevator.c
> index 3bcd37c2aa34..b03185a217ff 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -813,7 +813,7 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
>  	 *   update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del)
>  	 *   kn->active -> update_nr_hwq_lock (via this sysfs write path)
>  	 */
> -	if (!down_read_trylock(&set->update_nr_hwq_lock)) {
> +	if (!down_write_trylock(&set->update_nr_hwq_lock)) {

I'd suggest to document why using write_trylock above, such as serializing
2-stage elevator switch, anyway this patch looks good as bug fix:

Reviewed-by: Ming Lei <tom.leiming@gmail.com>


Thanks,
Ming

^ permalink raw reply

* Re: [PATCH 2/2] block: handle REQ_OP_ZONE_APPEND in __bio_integrity_action
From: Caleb Sander Mateos @ 2026-06-24 15:29 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Martin K. Petersen, linux-block
In-Reply-To: <20260624080014.1998650-3-hch@lst.de>

On Wed, Jun 24, 2026 at 1:00 AM Christoph Hellwig <hch@lst.de> wrote:
>
> Otherwise zone append commands will miss their integrity data.  While
> this works "fine" for auto-PI, it break file system PI and non-PI
> metadata.
>
> With this XFS on ZNS namespace with non-PI metadata and 512 byte sectors
> with PI work, while PI 4k sector formats with PI work only when Caleb's
> "block: fix integrity offset/length conversions" is applied as well.
>
> Note that unlike regular writes, zone append does need remapping as
> partitions are not supported on zoned block devices.

I take it 4-KB integrity intervals don't work due to the lack of
remapping for REQ_OP_ZONE_APPEND? Sounds like we should come back to
the discussion about cleaning up the ref tag seed and remapping, then.
I never got a reply from Martin on that thread. I guess remapping is
necessary at least for partitioned block devices, but we could skip it
for non-partitioned block devices if we initialized the ref tag seed
correctly.

Best,
Caleb


>
> Fixes: df3c485e0e60 ("block: switch on bio operation in bio_integrity_prep")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/bio-integrity.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/block/bio-integrity.c b/block/bio-integrity.c
> index a53b38cf8a1a..b23e2434d80c 100644
> --- a/block/bio-integrity.c
> +++ b/block/bio-integrity.c
> @@ -38,6 +38,7 @@ unsigned int __bio_integrity_action(struct bio *bio)
>                 }
>                 return BI_ACT_BUFFER | BI_ACT_CHECK;
>         case REQ_OP_WRITE:
> +       case REQ_OP_ZONE_APPEND:
>                 /*
>                  * Flush masquerading as write?
>                  */
> --
> 2.53.0
>

^ permalink raw reply

* Re: [PATCH 2/2] block: handle REQ_OP_ZONE_APPEND in __bio_integrity_action
From: Christoph Hellwig @ 2026-06-24 15:38 UTC (permalink / raw)
  To: Caleb Sander Mateos
  Cc: Christoph Hellwig, Jens Axboe, Martin K. Petersen, linux-block
In-Reply-To: <CADUfDZo4hysS6qj=Z=dEzVk=DQe6D7-zTFODLk8RGTJ13RY5uQ@mail.gmail.com>

On Wed, Jun 24, 2026 at 08:29:07AM -0700, Caleb Sander Mateos wrote:
> I take it 4-KB integrity intervals don't work due to the lack of
> remapping for REQ_OP_ZONE_APPEND? Sounds like we should come back to
> the discussion about cleaning up the ref tag seed and remapping, then.
> I never got a reply from Martin on that thread. I guess remapping is
> necessary at least for partitioned block devices, but we could skip it
> for non-partitioned block devices if we initialized the ref tag seed
> correctly.

We don't actually need the partition remapping because there can't
be partitions.  But I see on-the-wire reftag value that are 8 times
what they should be, so there is some kind of unit mismatch that your
series fixes.


^ permalink raw reply

* Re: [PATCH 2/2] block: handle REQ_OP_ZONE_APPEND in __bio_integrity_action
From: Caleb Sander Mateos @ 2026-06-24 15:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Martin K. Petersen, linux-block
In-Reply-To: <20260624153856.GA13186@lst.de>

On Wed, Jun 24, 2026 at 8:38 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Wed, Jun 24, 2026 at 08:29:07AM -0700, Caleb Sander Mateos wrote:
> > I take it 4-KB integrity intervals don't work due to the lack of
> > remapping for REQ_OP_ZONE_APPEND? Sounds like we should come back to
> > the discussion about cleaning up the ref tag seed and remapping, then.
> > I never got a reply from Martin on that thread. I guess remapping is
> > necessary at least for partitioned block devices, but we could skip it
> > for non-partitioned block devices if we initialized the ref tag seed
> > correctly.
>
> We don't actually need the partition remapping because there can't
> be partitions.  But I see on-the-wire reftag value that are 8 times
> what they should be, so there is some kind of unit mismatch that your
> series fixes.

Right, I don't mean partitions of zoned devices, but block devices in
general. I was just trying to understand why the remapping
infrastructure exists in the first place. Seems like we can't remove
it entirely, but we can at least ensure the ref tag seeds are correct
if it's skipped for a non-partitioned device.

^ permalink raw reply

* Re: [PATCH] blkcg: update iocost_coef_gen.py to use io_uring
From: Tejun Heo @ 2026-06-24 16:48 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Jens Axboe, linux-kernel, linux-block
In-Reply-To: <20260624-iocost-v1-1-2d53f3c026a2@kernel.org>

On Wed, Jun 24, 2026 at 10:50:34AM -0400, Jeff Layton wrote:
> Recently I found myself having to benchmark some rather fast disks for
> iocost, but the old iocost_coef_gen.py script couldn't generate enough
> throughput to saturate it. Make it use io_uring instead.
> 
> Cc: Tejun Heo <tj@kernel.org>
> Cc: linux-block@vger.kernel.org
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Acked-by: Tejun Heo <tj@kernel.org>

Thanks.

-- 
tejun

^ permalink raw reply

* [PATCH v3 0/5] block: validate direct I/O memory alignment
From: Keith Busch @ 2026-06-24 17:09 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: dm-devel, hch, axboe, brauner, djwong, viro, Keith Busch

From: Keith Busch <kbusch@kernel.org>

This addresses the misaligned direct-io problem behind various threads:

 https://lore.kernel.org/linux-xfs/20260610145218.141369-1-cem@kernel.org/
 https://lore.kernel.org/all/CAC_j7i1R7oy+nRhxEjCTba=DUgn02w9X+p94DCu0aHv5+5tKnQ@mail.gmail.com/
 https://lore.kernel.org/linux-block/ai7rnH20IYeSmY8s@gallifrey/
 https://lore.kernel.org/linux-block/20260616154009.2123183-1-kbusch@meta.com/

The previously tested fixes are correct as far as they go, but they
treat the symptom: they only matter because an invalid bio reaches those
drivers in the first place.

The reason it reaches them is an assumption I made when I removed
direct-io alignment checks in 5ff3f74e145a ("block: simplify direct io
validity check") and 7eac331869575 ("iomap: simplify direct io validity
check"): every bio is eventually split to the device limits, and the
upper layers cope with resulting errors once the bio has formed. Both
were optimistic assumptions. Drivers with their own ->submit_bio may
never pass through blk_mq_submit_bio()'s split, so the check never runs
for them, and as numerous threads showed, the consumers don't uniformly
handle this condition.

This series stops the invalid bio at the source instead. It validates
the buffer's alignment against the alignment limits when the bio is
built from the iov_iter. The check is folded into the bvec extraction
that already walks the vectors, so it adds only a comparison on a path
that is pinning direct-io pages anyway. Misalignment is now uniformly
rejected with EINVAL before submission for every direct-io path.

v2->v3:

- Dropped the bio_endio_errno helper and open-coded its two users.
- Documented the ITER_BVEC alignment expectation in uio.h and reworded
  the bvec check comment; the exhaustive per-segment validation stays
  behind CONFIG_DEBUG_KERNEL as a contract assertion.
- Reworked zloop_get_block_size() to mirror loop's structure.
- loop/zloop only ever tighten dma_alignment beyond the default.  I
  think these could use more relaxed alignments, but I'm just being
  extra conservative against introducing new changes here.

Previous version:

  https://lore.kernel.org/linux-block/20260622174241.2299563-1-kbusch@meta.com/

Keith Busch (5):
  block: use blkdev_iov_iter_get_pages status for errors
  block: fix dio leak on metadata mapping error
  loop: set dma_alignment from the backing file for direct I/O
  zloop: set dma_alignment from the backing files for direct I/O
  block: validate user space vectors during extraction

 block/bio.c           | 56 ++++++++++++++++++++++++++++++++++++++++---
 block/blk-map.c       |  2 +-
 block/fops.c          | 10 ++++----
 drivers/block/loop.c  | 46 ++++++++++++++++++++++++++++-------
 drivers/block/zloop.c | 35 +++++++++++++++++++--------
 fs/iomap/direct-io.c  |  1 +
 include/linux/bio.h   |  2 +-
 include/linux/uio.h   | 10 +++++++-
 lib/iov_iter.c        |  9 ++++++-
 9 files changed, 142 insertions(+), 29 deletions(-)


base-commit: 5c7804e3279c9bdc36e5eac743b4000633b25f65
-- 
2.53.0-Meta


^ permalink raw reply

* [PATCH v3 4/5] zloop: set dma_alignment from the backing files for direct I/O
From: Keith Busch @ 2026-06-24 17:09 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: dm-devel, hch, axboe, brauner, djwong, viro, Keith Busch
In-Reply-To: <20260624170905.3972095-1-kbusch@meta.com>

From: Keith Busch <kbusch@kernel.org>

Direct I/O user pages are forwarded to the backing files unchanged, so
the backing's DMA alignment requirement applies to them. Track the
backing file's dio_mem_align and advertise it as the zloop device's
dma_alignment if it is larger than the default so we advertise proper
limits and misaligned I/O is rejected early instead of being dispatched
to the backend.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/block/zloop.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 55eeb6aac0ea3..f97a20cfdb7ce 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -144,6 +144,7 @@ struct zloop_device {
 	unsigned int		nr_conv_zones;
 	unsigned int		max_open_zones;
 	unsigned int		block_size;
+	unsigned int		dio_mem_align;
 
 	spinlock_t		open_zones_lock;
 	struct list_head	open_zones_lru_list;
@@ -1037,20 +1038,30 @@ static int zloop_get_block_size(struct zloop_device *zlo,
 	struct kstat st;
 
 	/*
-	 * If the FS block size is lower than or equal to 4K, use that as the
-	 * device block size. Otherwise, fallback to the FS direct IO alignment
-	 * constraint if that is provided, and to the FS underlying device
-	 * physical block size if the direct IO alignment is unknown.
+	 * Use the dio alignment of the file system if provided.  The incoming
+	 * request's bio_vec is forwarded to the backing file unchanged, so its
+	 * required memory alignment becomes the device's dma_alignment when
+	 * used for direct-io.
 	 */
-	if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
-		zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize;
-	else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
-		 (st.result_mask & STATX_DIOALIGN))
+	if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
+	    (st.result_mask & STATX_DIOALIGN)) {
 		zlo->block_size = st.dio_offset_align;
-	else if (sb_bdev)
+		zlo->dio_mem_align = st.dio_mem_align - 1;
+	} else if (sb_bdev) {
 		zlo->block_size = bdev_physical_block_size(sb_bdev);
-	else
+		zlo->dio_mem_align = bdev_dma_alignment(sb_bdev);
+	} else {
 		zlo->block_size = SECTOR_SIZE;
+		zlo->dio_mem_align = SECTOR_SIZE - 1;
+	}
+
+	/*
+	 * Prefer the FS block size for the device block size when it is no
+	 * larger than 4K; otherwise keep the direct I/O / physical block size
+	 * selected above.
+	 */
+	if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
+		zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize;
 
 	if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
 		pr_err("Zone capacity is not aligned to block size %u\n",
@@ -1279,6 +1290,10 @@ static int zloop_ctl_add(struct zloop_options *opts)
 
 	lim.physical_block_size = zlo->block_size;
 	lim.logical_block_size = zlo->block_size;
+	/* Direct I/O forwards the request pages to the backing files as-is. */
+	if (!opts->buffered_io)
+		lim.dma_alignment = max_t(unsigned int, zlo->dio_mem_align,
+					  SECTOR_SIZE - 1);
 	if (zlo->zone_append)
 		lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
 	lim.max_open_zones = zlo->max_open_zones;
-- 
2.53.0-Meta


^ permalink raw reply related

* [PATCH v3 5/5] block: validate user space vectors during extraction
From: Keith Busch @ 2026-06-24 17:09 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: dm-devel, hch, axboe, brauner, djwong, viro, Keith Busch, stable
In-Reply-To: <20260624170905.3972095-1-kbusch@meta.com>

From: Keith Busch <kbusch@kernel.org>

The bio-based drivers don't necessarily check the alignment split, and
stacking block drivers don't always handle a misalignment detected after
submitting the bio. Validate user vectors against the device's
dma_alignment as the bio is built from the iov_iter, rejecting
misaligned early with -EINVAL.

Cc: stable@vger.kernel.org
Fixes: 5ff3f74e145a ("block: simplify direct io validity check")
Fixes: 7eac33186957 ("iomap: simplify direct io validity check")
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/bio.c          | 56 +++++++++++++++++++++++++++++++++++++++++---
 block/blk-map.c      |  2 +-
 block/fops.c         |  2 +-
 fs/iomap/direct-io.c |  1 +
 include/linux/bio.h  |  2 +-
 include/linux/uio.h  | 10 +++++++-
 lib/iov_iter.c       |  9 ++++++-
 7 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index f2a5f4d0a9672..faad41a72ac77 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1220,10 +1220,45 @@ static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter,
 	return 0;
 }
 
+#ifdef CONFIG_DEBUG_KERNEL
+static inline bool bio_iov_bvec_aligned(const struct bio *bio,
+					unsigned mem_align_mask)
+{
+	struct bvec_iter iter;
+	struct bio_vec bv;
+
+	/*
+	 * Correct callers never break the alignment requirements, so this
+	 * exhaustive check is only paid for in debug builds.
+	 */
+	for_each_mp_bvec(bv, bio->bi_io_vec, iter, bio->bi_iter)
+		if ((bv.bv_offset | bv.bv_len) & mem_align_mask)
+			return false;
+	return true;
+}
+#else
+static inline bool bio_iov_bvec_aligned(const struct bio *bio,
+					unsigned mem_align_mask)
+{
+	/*
+	 * We forward the bio_vec as-is, so ITER_BVEC callers must provide
+	 * segments already aligned to the device's DMA alignment. The only
+	 * unchecked user-controllable offset that reaches here is an io_uring
+	 * registered buffer where just the first segment can be unaligned
+	 * (the rest is virtually contiguous), so checking only that one is
+	 * sufficient to know if the entire vector is valid.
+	 */
+	return !(mp_bvec_iter_offset(bio->bi_io_vec, bio->bi_iter) &
+							mem_align_mask);
+}
+#endif
+
 /**
  * bio_iov_iter_get_pages - add user or kernel pages to a bio
  * @bio: bio to add pages to
  * @iter: iov iterator describing the region to be added
+ * @mem_align_mask: the mask the source address and length must be aligned to,
+ *	0 for no requirement
  * @len_align_mask: the mask to align the total size to, 0 for any length
  *
  * This takes either an iterator pointing to user memory, or one pointing to
@@ -1242,7 +1277,7 @@ static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter,
  * is returned only if 0 pages could be pinned.
  */
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
-			   unsigned len_align_mask)
+			   unsigned mem_align_mask, unsigned len_align_mask)
 {
 	iov_iter_extraction_t flags = 0;
 
@@ -1251,6 +1286,10 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
 
 	if (iov_iter_is_bvec(iter)) {
 		bio_iov_bvec_set(bio, iter);
+
+		if (!bio_iov_bvec_aligned(bio, mem_align_mask))
+			return -EINVAL;
+
 		iov_iter_advance(iter, bio->bi_iter.bi_size);
 		return 0;
 	}
@@ -1265,8 +1304,19 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
 
 		ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec,
 				BIO_MAX_SIZE - bio->bi_iter.bi_size,
-				&bio->bi_vcnt, bio->bi_max_vecs, flags);
+				&bio->bi_vcnt, bio->bi_max_vecs,
+				mem_align_mask, flags);
 		if (ret <= 0) {
+			/*
+			 * A misaligned vector fails the whole I/O.  Release any
+			 * pages pinned by earlier iterations before returning
+			 * since this bio won't be submitted to release them.
+			 */
+			if (ret == -EINVAL) {
+				bio_release_pages(bio, false);
+				bio_clear_flag(bio, BIO_PAGE_PINNED);
+				bio->bi_vcnt = 0;
+			}
 			if (!bio->bi_vcnt)
 				return ret;
 			break;
@@ -1377,7 +1427,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
 		ssize_t ret;
 
 		ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec + 1, len,
-				&bio->bi_vcnt, bio->bi_max_vecs - 1, 0);
+				&bio->bi_vcnt, bio->bi_max_vecs - 1, 0, 0);
 		if (ret <= 0) {
 			if (!bio->bi_vcnt) {
 				folio_put(folio);
diff --git a/block/blk-map.c b/block/blk-map.c
index 768549f19f97e..c9535efe1a913 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -274,7 +274,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 	 * No alignment requirements on our part to support arbitrary
 	 * passthrough commands.
 	 */
-	ret = bio_iov_iter_get_pages(bio, iter, 0);
+	ret = bio_iov_iter_get_pages(bio, iter, 0, 0);
 	if (ret)
 		goto out_put;
 	ret = blk_rq_append_bio(rq, bio);
diff --git a/block/fops.c b/block/fops.c
index 0098a90a956e1..e519d7f43b310 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -46,7 +46,7 @@ static bool blkdev_dio_invalid(struct block_device *bdev, struct kiocb *iocb,
 static inline int blkdev_iov_iter_get_pages(struct bio *bio,
 		struct iov_iter *iter, struct block_device *bdev)
 {
-	return bio_iov_iter_get_pages(bio, iter,
+	return bio_iov_iter_get_pages(bio, iter, bdev_dma_alignment(bdev),
 			bdev_logical_block_size(bdev) - 1);
 }
 
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b485e3b191daf..ff458aa12ae29 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -358,6 +358,7 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
 				iomap_max_bio_size(&iter->iomap), alignment);
 	else
 		ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
+					     bdev_dma_alignment(bio->bi_bdev),
 					     alignment - 1);
 	if (unlikely(ret))
 		goto out_put_bio;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8f33f717b14f5..ce34ea49ef358 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -477,7 +477,7 @@ int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
 		size_t len, enum req_op op);
 
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
-		unsigned len_align_mask);
+		unsigned mem_align_mask, unsigned len_align_mask);
 
 void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index a9bc5b3067e32..fe2e985d74d24 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -389,9 +389,17 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages,
 			       size_t maxsize, unsigned int maxpages,
 			       iov_iter_extraction_t extraction_flags,
 			       size_t *offset0);
+/*
+ * Block-layer consumers (e.g. bio_iov_iter_get_pages()) require that the
+ * segments of an ITER_BVEC iterator are already aligned to the target device's
+ * DMA alignment, and forward them as-is.  In-kernel users that build their own
+ * bvecs must not create sub-aligned segments; iov_iter_extract_bvecs() enforces
+ * the same for the segments it extracts via @mem_align_mask.
+ */
 ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv,
 		size_t max_size, unsigned short *nr_vecs,
-		unsigned short max_vecs, iov_iter_extraction_t extraction_flags);
+		unsigned short max_vecs, unsigned mem_align_mask,
+		iov_iter_extraction_t extraction_flags);
 
 /**
  * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 273919b161617..c343075951ded 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1886,6 +1886,8 @@ static unsigned int get_contig_folio_len(struct page **pages,
  * @max_size:	maximum size to extract from @iter
  * @nr_vecs:	number of vectors in @bv (on in and output)
  * @max_vecs:	maximum vectors in @bv, including those filled before calling
+ * @mem_align_mask:	reject with -EINVAL if the source address or
+ *		length is not aligned to this mask
  * @extraction_flags: flags to qualify request
  *
  * Like iov_iter_extract_pages(), but returns physically contiguous ranges
@@ -1897,14 +1899,19 @@ static unsigned int get_contig_folio_len(struct page **pages,
  */
 ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv,
 		size_t max_size, unsigned short *nr_vecs,
-		unsigned short max_vecs, iov_iter_extraction_t extraction_flags)
+		unsigned short max_vecs, unsigned mem_align_mask,
+		iov_iter_extraction_t extraction_flags)
 {
+	unsigned long start = (unsigned long)iter_iov_addr(iter);
 	unsigned short entries_left = max_vecs - *nr_vecs;
 	unsigned short nr_pages, i = 0;
 	size_t left, offset, len;
 	struct page **pages;
 	ssize_t size;
 
+	if ((start | iter_iov_len(iter)) & mem_align_mask)
+		return -EINVAL;
+
 	/*
 	 * Move page array up in the allocated memory for the bio vecs as far as
 	 * possible so that we can start filling biovecs from the beginning
-- 
2.53.0-Meta


^ permalink raw reply related

* [PATCH v3 2/5] block: fix dio leak on metadata mapping error
From: Keith Busch @ 2026-06-24 17:09 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: dm-devel, hch, axboe, brauner, djwong, viro, Keith Busch
In-Reply-To: <20260624170905.3972095-1-kbusch@meta.com>

From: Keith Busch <kbusch@kernel.org>

A failed integrity mapping holds a dio reference, so we need to go
through the full bio ending in case there were previously submitted
bio's in the sequence.

Fixes: 2729a60bbfb92 ("block: don't silently ignore metadata for sync read/write")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/fops.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index 0827bb884d473..0098a90a956e1 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -238,8 +238,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		}
 		if (iocb->ki_flags & IOCB_HAS_METADATA) {
 			ret = bio_integrity_map_iter(bio, iocb->private);
-			if (unlikely(ret))
-				goto fail;
+			if (unlikely(ret)) {
+				bio_endio_status(bio, errno_to_blk_status(ret));
+				break;
+			}
 		}
 
 		if (is_read) {
-- 
2.53.0-Meta


^ permalink raw reply related

* [PATCH v3 1/5] block: use blkdev_iov_iter_get_pages status for errors
From: Keith Busch @ 2026-06-24 17:09 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: dm-devel, hch, axboe, brauner, djwong, viro, Keith Busch
In-Reply-To: <20260624170905.3972095-1-kbusch@meta.com>

From: Keith Busch <kbusch@kernel.org>

blkdev_iov_iter_get_pages() can return various error values, including
EIO, EFAULT, and ENOMEM. Set the actual reported status so user space
can know a little more on why an operation failed.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/fops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/fops.c b/block/fops.c
index 15783a6180dec..0827bb884d473 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -218,7 +218,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 
 		ret = blkdev_iov_iter_get_pages(bio, iter, bdev);
 		if (unlikely(ret)) {
-			bio_endio_status(bio, BLK_STS_IOERR);
+			bio_endio_status(bio, errno_to_blk_status(ret));
 			break;
 		}
 		if (iocb->ki_flags & IOCB_NOWAIT) {
-- 
2.53.0-Meta


^ permalink raw reply related

* [PATCH v3 3/5] loop: set dma_alignment from the backing file for direct I/O
From: Keith Busch @ 2026-06-24 17:09 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: dm-devel, hch, axboe, brauner, djwong, viro, Keith Busch
In-Reply-To: <20260624170905.3972095-1-kbusch@meta.com>

From: Keith Busch <kbusch@kernel.org>

Direct I/O user pages are forwarded to the backing file unchanged, so
the backing's DMA alignment requirement applies to them. Track the
backing file's dio_mem_align and advertise it as the loop device's
dma_alignment if it is larger than the default so we advertise proper
limits and misaligned I/O is rejected early instead of being dispatched
to the backend.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/block/loop.c | 46 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 310de0463beb1..5fe61d542f8b7 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -54,6 +54,7 @@ struct loop_device {
 
 	struct file	*lo_backing_file;
 	unsigned int	lo_min_dio_size;
+	unsigned int	lo_dio_mem_align;
 	struct block_device *lo_device;
 
 	gfp_t		old_gfp_mask;
@@ -447,26 +448,37 @@ static void loop_reread_partitions(struct loop_device *lo)
 			__func__, lo->lo_number, lo->lo_file_name, rc);
 }
 
-static unsigned int loop_query_min_dio_size(struct loop_device *lo)
+static void loop_update_dio_alignment(struct loop_device *lo)
 {
 	struct file *file = lo->lo_backing_file;
 	struct block_device *sb_bdev = file->f_mapping->host->i_sb->s_bdev;
 	struct kstat st;
 
 	/*
-	 * Use the minimal dio alignment of the file system if provided.
+	 * Use the dio alignment of the file system if provided.  The incomoing
+	 * request's bio_vec is forwarded to the backing file unchanged, so its
+	 * required memory alignment becomes the device's dma_alignment when
+	 * used for direct-io.
 	 */
 	if (!vfs_getattr(&file->f_path, &st, STATX_DIOALIGN, 0) &&
-	    (st.result_mask & STATX_DIOALIGN))
-		return st.dio_offset_align;
+	    (st.result_mask & STATX_DIOALIGN)) {
+		lo->lo_min_dio_size = st.dio_offset_align;
+		lo->lo_dio_mem_align = st.dio_mem_align - 1;
+		return;
+	}
 
 	/*
 	 * In a perfect world this wouldn't be needed, but as of Linux 6.13 only
 	 * a handful of file systems support the STATX_DIOALIGN flag.
 	 */
-	if (sb_bdev)
-		return bdev_logical_block_size(sb_bdev);
-	return SECTOR_SIZE;
+	if (sb_bdev) {
+		lo->lo_min_dio_size = bdev_logical_block_size(sb_bdev);
+		lo->lo_dio_mem_align = bdev_dma_alignment(sb_bdev);
+		return;
+	}
+
+	lo->lo_min_dio_size = SECTOR_SIZE;
+	lo->lo_dio_mem_align = SECTOR_SIZE - 1;
 }
 
 static inline int is_loop_device(struct file *file)
@@ -509,7 +521,7 @@ static void loop_assign_backing_file(struct loop_device *lo, struct file *file)
 			lo->old_gfp_mask & ~(__GFP_IO | __GFP_FS));
 	if (lo->lo_backing_file->f_flags & O_DIRECT)
 		lo->lo_flags |= LO_FLAGS_DIRECT_IO;
-	lo->lo_min_dio_size = loop_query_min_dio_size(lo);
+	loop_update_dio_alignment(lo);
 }
 
 static int loop_check_backing_file(struct file *file)
@@ -940,6 +952,19 @@ static unsigned int loop_default_blocksize(struct loop_device *lo)
 	return SECTOR_SIZE;
 }
 
+static void loop_set_dma_limit(struct loop_device *lo, struct queue_limits *lim)
+{
+	/*
+	 * Direct I/O forwards the user pages to the backing file unchanged, so
+	 * track the backing's DMA alignment requirement as the mode is toggled.
+	 */
+	if (lo->lo_flags & LO_FLAGS_DIRECT_IO)
+		lim->dma_alignment = max_t(unsigned int, lo->lo_dio_mem_align,
+					   SECTOR_SIZE - 1);
+	else
+		lim->dma_alignment = SECTOR_SIZE - 1;
+}
+
 static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim,
 		unsigned int bsize)
 {
@@ -961,6 +986,7 @@ static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim,
 	lim->logical_block_size = bsize;
 	lim->physical_block_size = bsize;
 	lim->io_min = bsize;
+	loop_set_dma_limit(lo, lim);
 	lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL);
 	if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY))
 		lim->features |= BLK_FEAT_WRITE_CACHE;
@@ -1416,6 +1442,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
 {
 	bool use_dio = !!arg;
 	unsigned int memflags;
+	struct queue_limits lim;
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
@@ -1434,6 +1461,9 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
 		lo->lo_flags |= LO_FLAGS_DIRECT_IO;
 	else
 		lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
+	lim = queue_limits_start_update(lo->lo_queue);
+	loop_set_dma_limit(lo, &lim);
+	queue_limits_commit_update(lo->lo_queue, &lim);
 	blk_mq_unfreeze_queue(lo->lo_queue, memflags);
 	return 0;
 }
-- 
2.53.0-Meta


^ permalink raw reply related

* [PATCH] iomap: Remove FGP_NOFS from iomap_get_folio()
From: Matthew Wilcox (Oracle) @ 2026-06-24 17:42 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Matthew Wilcox (Oracle), Darrick J. Wong, Jens Axboe, Namjae Jeon,
	Sungjong Seo, Yuezhang Mo, Miklos Szeredi, Andreas Gruenbacher,
	Hyunchul Lee, Konstantin Komarov, Carlos Maiolino, Damien Le Moal,
	Naohiro Aota, Johannes Thumshirn, linux-xfs, linux-fsdevel,
	linux-block, fuse-devel, gfs2, ntfs3

FGP_NOFS is legacy; filesystems should be using memalloc_nofs_save/restore
instead.  We have it here in iomap because it was buried in
grab_cache_page_write_begin() and we didn't want to change this behaviour
as part of the folio transition.

I have tested this with XFS and see no issues.  Other filesystems (cc'd)
may need to make adjustments.  Please test with lockdep enabled.

Cc: "Darrick J. Wong" <djwong@kernel.org> (iomap)
Cc: Jens Axboe <axboe@kernel.dk> (block)
Cc: Namjae Jeon <linkinjeon@kernel.org> (exfat, ntfs)
Cc: Sungjong Seo <sj1557.seo@samsung.com> (exfat)
Cc: Yuezhang Mo <yuezhang.mo@sony.com> (exfat)
Cc: Miklos Szeredi <miklos@szeredi.hu> (fuse)
Cc: Andreas Gruenbacher <agruenba@redhat.com> (gfs2)
Cc: Hyunchul Lee <hyc.lee@gmail.com> (ntfs)
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com> (ntfs3)
Cc: Carlos Maiolino <cem@kernel.org> (xfs)
Cc: Damien Le Moal <dlemoal@kernel.org> (zonefs)
Cc: Naohiro Aota <naohiro.aota@wdc.com> (zonefs)
Cc: Johannes Thumshirn <jth@kernel.org> (zonefs)
Cc: linux-xfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org
Cc: fuse-devel@lists.linux.dev
Cc: gfs2@lists.linux.dev
Cc: ntfs3@lists.linux.dev
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 8d4806dc46d4..27bc2455a98d 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -768,7 +768,7 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
  */
 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len)
 {
-	fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS;
+	fgf_t fgp = FGP_WRITEBEGIN;
 
 	if (iter->flags & IOMAP_NOWAIT)
 		fgp |= FGP_NOWAIT;
-- 
2.47.3


^ permalink raw reply related

* Re: [PATCH RFC v2 17/18] fs: look up the superblock via the device table in user_get_super()
From: Darrick J. Wong @ 2026-06-24 17:54 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Jan Kara, Christoph Hellwig, Jens Axboe, Alexander Viro,
	linux-block, linux-kernel, linux-fsdevel, Carlos Maiolino,
	linux-xfs, Chris Mason, David Sterba, linux-btrfs,
	Theodore Ts'o, linux-ext4, Gao Xiang, linux-erofs
In-Reply-To: <20260616-work-super-bdev_holder_global-v2-17-7df6b864028e@kernel.org>

On Tue, Jun 16, 2026 at 04:08:33PM +0200, Christian Brauner wrote:
> user_get_super() still finds the superblock for a device number by
> walking the global super_blocks list under sb_lock. Every superblock is
> registered in the device table under its s_dev since sget_fc() inserts
> it there, including superblocks on anonymous devices, so use the table
> instead.
> 
> The refcount-pinning cursor helpers super_dev_{get,first,next}() only
> touch table state and do not depend on CONFIG_BLOCK, so drop the
> CONFIG_BLOCK guard around them: their new caller serves anonymous
> devices as well (ustat() on e.g. tmpfs) and is built without
> CONFIG_BLOCK. The guard falls in this patch rather than separately
> since without this caller the helpers would be unused without
> CONFIG_BLOCK.
> 
> The pinned entry holds a passive reference on the superblock so
> super_lock() can be called directly; once the superblock is locked grab
> a passive reference for the caller before dropping the pin.
> 
> The device table contains more than the old walk could find: a
> superblock is also registered for every additional device it claims
> (the xfs log and realtime devices, btrfs member devices, the ext4
> external journal, erofs blob devices). Don't filter those out:
> specifying any device a filesystem uses now resolves to that
> filesystem, so ustat() and quotactl() work on e.g. the xfs log device
> or a btrfs member device (the latter used to fail outright as btrfs
> superblocks carry an anonymous s_dev that never matches a member
> device). When several superblocks share a device (erofs blob devices)
> the first live superblock wins.

Does erofs have a means to find the other superblocks that share a
device given a notification coming in on one of them?  As hch says, it
feels weird to have a lookup mechanism when there's also an upcall
mechanism.

<shrug> I've been on vacation for a while so maybe I missed that there's
another use for the bdev->sb lookup?  There are 1600 more emails for me
to go through... :P

--D

> 
> The cursor also keeps scanning past dying superblocks where the old
> walk gave up after the first s_dev match, so a mount racing with the
> unmount of the same device (or with the reuse of a recycled anonymous
> dev_t) finds the live superblock where the old walk could spuriously
> return NULL.
> 
> This removes the last s_dev-keyed walk of the super_blocks list and
> takes ustat() and quotactl()'s block device lookup off sb_lock
> entirely.
> 
> Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
> ---
>  fs/super.c | 28 ++++++++--------------------
>  1 file changed, 8 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/super.c b/fs/super.c
> index 2d0a07861bfc..93f24aea75c4 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -501,7 +501,6 @@ static int super_dev_register(struct super_block *sb)
>  	return err;
>  }
>  
> -#ifdef CONFIG_BLOCK
>  static struct super_dev *super_dev_get(struct rhlist_head *pos)
>  {
>  	struct super_dev *sb_dev;
> @@ -535,7 +534,6 @@ static struct super_dev *super_dev_next(struct super_dev *prev)
>  	super_dev_put(prev);
>  	return sb_dev;
>  }
> -#endif
>  
>  static void kill_super_notify(struct super_block *sb)
>  {
> @@ -1044,29 +1042,19 @@ EXPORT_SYMBOL(iterate_supers_type);
>  
>  struct super_block *user_get_super(dev_t dev, bool excl)
>  {
> -	struct super_block *sb;
> -
> -	spin_lock(&sb_lock);
> -	list_for_each_entry(sb, &super_blocks, s_list) {
> -		bool locked;
> +	struct super_dev *sb_dev;
>  
> -		if (sb->s_dev != dev)
> -			continue;
> +	for (sb_dev = super_dev_first(dev); sb_dev; sb_dev = super_dev_next(sb_dev)) {
> +		struct super_block *sb = sb_dev->sd_sb;
>  
> -		if (!refcount_inc_not_zero(&sb->s_passive))
> +		if (!super_lock(sb, excl))
>  			continue;
>  
> -		spin_unlock(&sb_lock);
> -
> -		locked = super_lock(sb, excl);
> -		if (locked)
> -			return sb;
> -
> -		put_super(sb);
> -		spin_lock(&sb_lock);
> -		break;
> +		/* The pinned entry holds a passive reference, take our own. */
> +		refcount_inc(&sb->s_passive);
> +		super_dev_put(sb_dev);
> +		return sb;
>  	}
> -	spin_unlock(&sb_lock);
>  	return NULL;
>  }
>  
> 
> -- 
> 2.47.3
> 
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox