Linux block layer
 help / color / mirror / Atom feed
* [PATCH v2 51/83] block: rust: add zoned block device support
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add support for zoned block devices to the Rust block layer bindings.
This includes the `report_zones` callback in `Operations` and methods
in `GenDiskBuilder` to configure zoned device parameters.

Drivers can mark a disk as zoned and configure the zone size and
maximum zone append size. The `report_zones` callback is invoked by
the block layer to query zone information.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/bindings/bindings_helper.h    |  1 +
 rust/kernel/block/mq/gen_disk.rs   | 95 +++++++++++++++++++++++++++++++++-----
 rust/kernel/block/mq/operations.rs | 61 +++++++++++++++++++++++-
 3 files changed, 145 insertions(+), 12 deletions(-)

diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index eaf05d60dda9..2a69c17bf271 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -139,6 +139,7 @@ const blk_status_t RUST_CONST_HELPER_BLK_STS_ZONE_ACTIVE_RESOURCE = BLK_STS_ZONE
 const blk_status_t RUST_CONST_HELPER_BLK_STS_OFFLINE = BLK_STS_OFFLINE;
 const blk_status_t RUST_CONST_HELPER_BLK_STS_DURATION_LIMIT = BLK_STS_DURATION_LIMIT;
 const blk_status_t RUST_CONST_HELPER_BLK_STS_INVAL = BLK_STS_INVAL;
+const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ZONED = BLK_FEAT_ZONED;
 const fop_flags_t RUST_CONST_HELPER_FOP_UNSIGNED_OFFSET = FOP_UNSIGNED_OFFSET;
 
 const xa_mark_t RUST_CONST_HELPER_XA_PRESENT = XA_PRESENT;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 79a67b545eca..eedba691e167 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -8,6 +8,7 @@
 use crate::{
     bindings,
     block::mq::{
+        operations::OperationsVTable,
         Operations,
         RequestQueue,
         TagSet, //
@@ -48,6 +49,12 @@ pub struct GenDiskBuilder<T> {
     physical_block_size: u32,
     capacity_sectors: u64,
     max_hw_discard_sectors: u32,
+    #[cfg(CONFIG_BLK_DEV_ZONED)]
+    zoned: bool,
+    #[cfg(CONFIG_BLK_DEV_ZONED)]
+    zone_size_sectors: u32,
+    #[cfg(CONFIG_BLK_DEV_ZONED)]
+    zone_append_max_sectors: u32,
     _p: PhantomData<T>,
 }
 
@@ -59,6 +66,12 @@ fn default() -> Self {
             physical_block_size: bindings::PAGE_SIZE as u32,
             capacity_sectors: 0,
             max_hw_discard_sectors: 0,
+            #[cfg(CONFIG_BLK_DEV_ZONED)]
+            zoned: false,
+            #[cfg(CONFIG_BLK_DEV_ZONED)]
+            zone_size_sectors: 0,
+            #[cfg(CONFIG_BLK_DEV_ZONED)]
+            zone_append_max_sectors: 0,
             _p: PhantomData,
         }
     }
@@ -130,6 +143,27 @@ pub fn max_hw_discard_sectors(mut self, max_hw_discard_sectors: u32) -> Self {
         self
     }
 
+    /// Mark this device as a zoned block device.
+    #[cfg(CONFIG_BLK_DEV_ZONED)]
+    pub fn zoned(mut self, enable: bool) -> Self {
+        self.zoned = enable;
+        self
+    }
+
+    /// Set the zone size of this block device.
+    #[cfg(CONFIG_BLK_DEV_ZONED)]
+    pub fn zone_size(mut self, sectors: u32) -> Self {
+        self.zone_size_sectors = sectors;
+        self
+    }
+
+    /// Set the max zone append size for this block device.
+    #[cfg(CONFIG_BLK_DEV_ZONED)]
+    pub fn zone_append_max(mut self, sectors: u32) -> Self {
+        self.zone_append_max_sectors = sectors;
+        self
+    }
+
     /// Build a new `GenDisk` and add it to the VFS.
     pub fn build(
         self,
@@ -149,7 +183,18 @@ pub fn build(
         lim.physical_block_size = self.physical_block_size;
         lim.max_hw_discard_sectors = self.max_hw_discard_sectors;
         if self.rotational {
-            lim.features = bindings::BLK_FEAT_ROTATIONAL;
+            lim.features |= bindings::BLK_FEAT_ROTATIONAL;
+        }
+
+        #[cfg(CONFIG_BLK_DEV_ZONED)]
+        if self.zoned {
+            if !T::HAS_REPORT_ZONES {
+                return Err(error::code::EINVAL);
+            }
+
+            lim.features |= bindings::BLK_FEAT_ZONED;
+            lim.chunk_sectors = self.zone_size_sectors;
+            lim.max_hw_zone_append_sectors = self.zone_append_max_sectors;
         }
 
         // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
@@ -179,14 +224,6 @@ pub fn build(
         // operation, so we will not race.
         unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) };
 
-        crate::error::to_result(
-            // SAFETY: `gendisk` points to a valid and initialized instance of
-            // `struct gendisk`.
-            unsafe {
-                bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
-            },
-        )?;
-
         recover_data.dismiss();
 
         // INVARIANT: `gendisk` was initialized above.
@@ -214,7 +251,27 @@ pub fn build(
             GFP_KERNEL,
         )?;
 
-        Ok(disk.into())
+        let disk: Arc<_> = disk.into();
+
+        // SAFETY: `disk.gendisk` is valid for write as we initialized it above. We have exclusive
+        // access.
+        unsafe { (*disk.gendisk).private_data = Arc::as_ptr(&disk).cast_mut().cast() };
+
+        #[cfg(CONFIG_BLK_DEV_ZONED)]
+        if self.zoned {
+            // SAFETY: `disk.gendisk` is valid as we initialized it above. We have exclusive access.
+            unsafe { bindings::blk_revalidate_disk_zones(gendisk) };
+        }
+
+        crate::error::to_result(
+            // SAFETY: `gendisk` points to a valid and initialized instance of
+            // `struct gendisk`.
+            unsafe {
+                bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
+            },
+        )?;
+
+        Ok(disk)
     }
 
     const VTABLE: bindings::block_device_operations = bindings::block_device_operations {
@@ -228,7 +285,11 @@ pub fn build(
         getgeo: None,
         set_read_only: None,
         swap_slot_free_notify: None,
-        report_zones: None,
+        report_zones: if T::HAS_REPORT_ZONES {
+            Some(OperationsVTable::<T>::report_zones_callback)
+        } else {
+            None
+        },
         devnode: None,
         alternative_gpt_sector: None,
         get_unique_id: None,
@@ -327,6 +388,18 @@ fn drop(&mut self) {
 /// `self.0` is valid for use as a reference.
 pub struct GenDiskRef<T: Operations>(NonNull<GenDisk<T>>);
 
+impl<T: Operations> GenDiskRef<T> {
+    /// Create a `GenDiskRef` from a pointer to a `GenDisk`.
+    ///
+    /// # Safety
+    ///
+    /// `ptr` must be valid for use as a `GenDisk` reference for the lifetime of the returned
+    /// `GenDiskRef`.
+    pub(crate) unsafe fn from_ptr(ptr: NonNull<GenDisk<T>>) -> GenDiskRef<T> {
+        Self(ptr)
+    }
+}
+
 // SAFETY: It is safe to transfer ownership of `GenDiskRef` across thread boundaries.
 unsafe impl<T: Operations> Send for GenDiskRef<T> {}
 
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index b9a2bf6592b3..71d4192d627f 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -9,6 +9,7 @@
     block::{
         error::BlkResult,
         mq::{
+            gen_disk::GenDiskRef,
             request::RequestDataWrapper,
             IdleRequest,
             Request, //
@@ -16,6 +17,7 @@
     },
     error::{
         from_result,
+        to_result,
         Result, //
     },
     prelude::*,
@@ -29,7 +31,10 @@
         Owned, //
     },
 };
-use core::marker::PhantomData;
+use core::{
+    marker::PhantomData,
+    ptr::NonNull, //
+};
 use pin_init::PinInit;
 
 type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>;
@@ -107,6 +112,20 @@ fn init_hctx(
     fn poll(_hw_data: ForeignBorrowed<'_, Self::HwData>) -> bool {
         build_error!(crate::error::VTABLE_DEFAULT_ERROR)
     }
+
+    /// Called by the kernel to get a zone report from the driver.
+    ///
+    /// The driver must call `callback` once for each zone on `disk` and populate the first argument
+    /// with a zone descriptor and the second argument when the zone index.
+    // TODO: We cannot gate this on CONFIG_BLK_DEV_ZONED due to limitations of the `vtable` macro.
+    fn report_zones(
+        _disk: &GenDiskRef<Self>,
+        _sector: u64,
+        _nr_zones: u32,
+        _callback: impl Fn(&bindings::blk_zone, u32) -> Result,
+    ) -> Result<u32> {
+        Err(ENOTSUPP)
+    }
 }
 
 /// A vtable for blk-mq to interact with a block device driver.
@@ -359,6 +378,46 @@ impl<T: Operations> OperationsVTable<T> {
         unsafe { core::ptr::drop_in_place(pdu) };
     }
 
+    /// This function is a callback hook for the C kernel. A pointer to this function is
+    /// installed in the `blk_mq_ops` vtable for the driver.
+    ///
+    /// # Safety
+    ///
+    /// - This function may only be called by blk-mq C infrastructure.
+    /// - `disk_ptr` must be a pointer to a gendisk initialized by `GenDisk::build`.
+    pub(crate) unsafe extern "C" fn report_zones_callback(
+        disk_ptr: *mut bindings::gendisk,
+        sector: u64,
+        nr_zones: u32,
+        args: *mut bindings::blk_report_zones_args,
+    ) -> i32 {
+        // SAFETY: As `disk_ptr` is a gendisk initialized by `GenDisk::build`, `private_data` is not
+        // null.
+        let disk_ref_ptr = unsafe { NonNull::new_unchecked((*disk_ptr).private_data.cast()) };
+
+        // SAFETY: `disk_ptr.private_data` is a pointer to the `GenDisk` owner of `disk_ptr` that we
+        // installed when we initialized `disk_ptr`. It is valid for use as a reference for the
+        // duration of this call.
+        let disk = unsafe { GenDiskRef::from_ptr(disk_ref_ptr) };
+
+        from_result(|| {
+            T::report_zones(&disk, sector, nr_zones, |zone, idx| -> Result {
+                to_result(
+                    // SAFETY: `disk_ptr` is valid by function safety requirements.
+                    unsafe {
+                        bindings::disk_report_zone(
+                            disk_ptr,
+                            core::ptr::from_ref(zone).cast_mut(),
+                            idx,
+                            args,
+                        )
+                    },
+                )
+            })
+            .and_then(|v: u32| -> Result<_> { Ok(v.try_into()?) })
+        })
+    }
+
     const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops {
         queue_rq: Some(Self::queue_rq_callback),
         queue_rqs: None,

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 48/83] block: rust: add an abstraction for `bindings::req_op`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add the `Command` enum as a Rust abstraction for block request operation
codes. The enum variants correspond to the C `REQ_OP_*` defines and
include read, write, flush, discard, and zone management operations.

Also add a `command()` method to `Request` to retrieve the operation
code.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/rnull.rs            |  6 +--
 rust/kernel/block/mq.rs                 |  1 +
 rust/kernel/block/mq/request.rs         | 18 +++++----
 rust/kernel/block/mq/request/command.rs | 65 +++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 11 deletions(-)

diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 491979daa50e..5ec17a2674b7 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -547,10 +547,10 @@ fn transfer(
                 let length_sectors_allowed = segment_length_sectors.min(max_remaining_sectors);
                 segment.truncate(length_sectors_allowed << SECTOR_SHIFT);
                 match command {
-                    bindings::req_op_REQ_OP_WRITE => {
+                    mq::Command::Write => {
                         self.write(&mut tree_guard, &mut hw_data_guard, sector, segment)?
                     }
-                    bindings::req_op_REQ_OP_READ => {
+                    mq::Command::Read => {
                         self.read(&mut tree_guard, &mut hw_data_guard, sector, segment)?
                     }
                     _ => (),
@@ -743,7 +743,7 @@ fn queue_rq(
 
         if this.memory_backed {
             memalloc_scope!(let _noio: NoIo);
-            if rq.command() == bindings::req_op_REQ_OP_DISCARD {
+            if rq.command() == mq::Command::Discard {
                 this.discard(&hw_data, rq.sector(), sectors)?;
             } else {
                 this.transfer(&hw_data, &mut rq, sectors)?;
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 503623267b19..5bf2cf2736a5 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -132,6 +132,7 @@
 
 pub use operations::Operations;
 pub use request::{
+    Command,
     IdleRequest,
     Request,
     RequestTimerHandle, //
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index a05df2351c2c..63e248970ab1 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -45,6 +45,9 @@
     BioIterator, //
 };
 
+mod command;
+pub use command::Command;
+
 /// A [`Request`] that a driver has not yet begun to process.
 ///
 /// A driver can convert an `IdleRequest` to a [`Request`] by calling [`IdleRequest::start`].
@@ -111,11 +114,17 @@ fn deref(&self) -> &Self::Target {
 
 impl<T: Operations> RequestInner<T> {
     /// Get the command identifier for the request
-    pub fn command(&self) -> u32 {
+    fn command_raw(&self) -> u32 {
         // SAFETY: By C API contract and type invariant, `cmd_flags` is valid for read
         unsafe { (*self.0.get()).cmd_flags & ((1 << bindings::REQ_OP_BITS) - 1) }
     }
 
+    /// Get the command of this request.
+    pub fn command(&self) -> Command {
+        // SAFETY: By type invariant of `Self`, `self.0` is valid and live.
+        unsafe { Command::from_raw(self.command_raw()) }
+    }
+
     /// Get the target sector for the request.
     #[inline(always)]
     pub fn sector(&self) -> u64 {
@@ -242,13 +251,6 @@ pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef<Self> {
         unsafe { ARef::from_raw(NonNull::new_unchecked(ptr.cast())) }
     }
 
-    /// Get the command identifier for the request
-    pub fn command(&self) -> u32 {
-        use core::ops::BitAnd;
-        // SAFETY: By C API contract and type invariant, `cmd_flags` is valid for read
-        unsafe { (*self.0 .0.get()).cmd_flags }.bitand((1u32 << bindings::REQ_OP_BITS) - 1)
-    }
-
     /// Complete the request by scheduling `Operations::complete` for
     /// execution.
     ///
diff --git a/rust/kernel/block/mq/request/command.rs b/rust/kernel/block/mq/request/command.rs
new file mode 100644
index 000000000000..70a8d67fa35c
--- /dev/null
+++ b/rust/kernel/block/mq/request/command.rs
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/// Block I/O operation codes.
+///
+/// This is the Rust abstraction for the C [`enum req_op`].
+///
+/// Operations common to the bio and request structures. The kernel uses 8 bits
+/// for encoding the operation, and the remaining 24 bits for flags.
+///
+/// The least significant bit of the operation number indicates the data
+/// transfer direction:
+///
+/// - If the least significant bit is set, transfers are TO the device.
+/// - If the least significant bit is not set, transfers are FROM the device.
+///
+/// If an operation does not transfer data, the least significant bit has no
+/// meaning.
+///
+/// [`enum req_op`]: srctree/include/linux/blk_types.h
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[repr(u32)]
+pub enum Command {
+    /// Read sectors from the device.
+    Read = bindings::req_op_REQ_OP_READ,
+    /// Write sectors to the device.
+    Write = bindings::req_op_REQ_OP_WRITE,
+    /// Flush the volatile write cache.
+    Flush = bindings::req_op_REQ_OP_FLUSH,
+    /// Discard sectors.
+    Discard = bindings::req_op_REQ_OP_DISCARD,
+    /// Securely erase sectors.
+    SecureErase = bindings::req_op_REQ_OP_SECURE_ERASE,
+    /// Write data at the current zone write pointer.
+    ZoneAppend = bindings::req_op_REQ_OP_ZONE_APPEND,
+    /// Write zeroes. This allows to implement zeroing for devices that don't use either discard
+    /// with a predictable zero pattern or WRITE SAME of zeroes.
+    WriteZeroes = bindings::req_op_REQ_OP_WRITE_ZEROES,
+    /// Open a zone.
+    ZoneOpen = bindings::req_op_REQ_OP_ZONE_OPEN,
+    /// Close a zone.
+    ZoneClose = bindings::req_op_REQ_OP_ZONE_CLOSE,
+    /// Transition a zone to full.
+    ZoneFinish = bindings::req_op_REQ_OP_ZONE_FINISH,
+    /// Reset a zone write pointer.
+    ZoneReset = bindings::req_op_REQ_OP_ZONE_RESET,
+    /// Reset all the zones present on the device.
+    ZoneResetAll = bindings::req_op_REQ_OP_ZONE_RESET_ALL,
+    /// Driver private request for data transfer to the driver.
+    DriverIn = bindings::req_op_REQ_OP_DRV_IN,
+    /// Driver private request for data transfer from the driver.
+    DriverOut = bindings::req_op_REQ_OP_DRV_OUT,
+}
+
+impl Command {
+    /// Creates a [`Command`] from a raw `u32` value.
+    ///
+    /// # Safety
+    ///
+    /// The value must be a valid `req_op` operation code.
+    pub unsafe fn from_raw(value: u32) -> Self {
+        // SAFETY: The caller guarantees that the value is a valid operation
+        // code.
+        unsafe { core::mem::transmute(value) }
+    }
+}

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 18/83] block: rnull: add `use_per_node_hctx` config option
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add a configfs attribute to enable per-NUMA-node hardware contexts.
When enabled, the driver creates one hardware queue per NUMA node
instead of the default configuration.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs | 24 ++++++++++++++++++++++--
 drivers/block/rnull/rnull.rs    | 28 ++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 0dea92a9079b..71b38373be33 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -33,7 +33,8 @@
     configfs_simple_bool_field,
     configfs_simple_field,
     show_field,
-    store_number_with_power_check, //
+    store_number_with_power_check,
+    store_with_power_check, //
 };
 
 mod macros;
@@ -62,7 +63,7 @@ impl AttributeOperations<0> for Config {
         let mut writer = kernel::str::Formatter::new(page);
         writer.write_str(
             "blocksize,size,rotational,irqmode,completion_nsec,memory_backed,\
-             submit_queues\n",
+             submit_queues,use_per_node_hctx\n",
         )?;
         Ok(writer.bytes_written())
     }
@@ -89,6 +90,7 @@ fn make_group(
                 completion_nsec: 5,
                 memory_backed: 6,
                 submit_queues: 7,
+                use_per_node_hctx: 8,
             ],
         };
 
@@ -268,3 +270,21 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
         Ok(())
     }
 }
+
+configfs_attribute!(DeviceConfig, 8,
+    show: |this, page| show_field(
+        this.data.lock().submit_queues == kernel::numa::num_online_nodes(), page
+    ),
+    store: |this, page| store_with_power_check(this, page, |data, page| {
+        let value = core::str::from_utf8(page)?
+            .trim()
+            .parse::<u8>()
+            .map_err(|_| kernel::error::code::EINVAL)?
+            != 0;
+
+        if value {
+            data.submit_queues = kernel::numa::num_online_nodes();
+        }
+        Ok(())
+    })
+);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index a7c35f33631a..30de022146ec 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -21,12 +21,18 @@
         },
     },
     error::Result,
-    memalloc_scope, new_mutex, new_xarray,
+    memalloc_scope,
+    new_mutex,
+    new_xarray,
     page::SafePage,
     pr_info,
     prelude::*,
     str::CString,
-    sync::{aref::ARef, Arc, Mutex},
+    sync::{
+        aref::ARef,
+        Arc,
+        Mutex, //
+    },
     time::{
         hrtimer::{
             HrTimerCallback,
@@ -40,7 +46,7 @@
         OwnableRefCounted,
         Owned, //
     },
-    xarray::XArray,
+    xarray::XArray, //
 };
 
 module! {
@@ -71,8 +77,9 @@
             description:  "IRQ completion handler. 0-none, 1-softirq, 2-timer",
         },
         completion_nsec: u64 {
-            default: 10_000,
-            description:  "Time in ns to complete a request in hardware. Default: 10,000ns",
+                default: 10_000,
+                description:
+            "Time in ns to complete a request in hardware. Default: 10,000ns",
         },
         memory_backed: bool {
             default: false,
@@ -82,6 +89,10 @@
             default: 1,
             description: "Number of submission queues",
         },
+        use_per_node_hctx: bool {
+            default: false,
+            description: "Use per-node allocation for hardware context queues.",
+        },
     },
 }
 
@@ -104,6 +115,11 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
             for i in 0..module_parameters::nr_devices.value() {
                 let name = CString::try_from_fmt(fmt!("rnullb{}", i))?;
 
+                let submit_queues = if module_parameters::use_per_node_hctx.value() {
+                    kernel::numa::num_online_nodes()
+                } else {
+                    module_parameters::submit_queues.value()
+                };
                 let disk = NullBlkDevice::new(NullBlkOptions {
                     name: &name,
                     block_size: module_parameters::bs.value(),
@@ -112,7 +128,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                     irq_mode: module_parameters::irqmode.value().try_into()?,
                     completion_time: Delta::from_nanos(completion_time),
                     memory_backed: module_parameters::memory_backed.value(),
-                    submit_queues: module_parameters::submit_queues.value(),
+                    submit_queues,
                 })?;
                 disks.push(disk, GFP_KERNEL)?;
             }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 04/83] block: rnull: add module parameters
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Module parameter support for Rust modules was merged a few releases back.
Add module parameter support to the rnull driver.

This allows the user to control the driver either via configfs or module
parameters, just like the C counterpart.

Please note that the rust module parameters do not support boolean values.
Flags that should have been booleans are parsed as integers and compared to
zero.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/rnull.rs | 50 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index d58d2c4c5f63..77ccc6850961 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -18,10 +18,14 @@
             TagSet, //
         },
     },
+    error::Result,
+    new_mutex, pr_info,
     prelude::*,
+    str::CString,
     sync::{
         aref::ARef,
-        Arc, //
+        Arc,
+        Mutex, //
     },
 };
 
@@ -31,20 +35,64 @@
     authors: ["Andreas Hindborg"],
     description: "Rust implementation of the C null block driver",
     license: "GPL v2",
+    params: {
+        gb: u64 {
+            default: 4,
+            description: "Device capacity in GiB",
+        },
+        rotational: bool {
+            default: false,
+            description: "Set the rotational feature for the device.",
+        },
+        bs: u32 {
+            default: 4096,
+            description: "Block size (in bytes)",
+        },
+        nr_devices: u64 {
+            default: 1,
+            description: "Number of devices to register",
+        },
+        irqmode: u8 {
+            default: 0,
+            description:  "IRQ completion handler. 0-none, 1-softirq",
+        },
+    },
 }
 
 #[pin_data]
 struct NullBlkModule {
     #[pin]
     configfs_subsystem: kernel::configfs::Subsystem<configfs::Config>,
+    #[pin]
+    param_disks: Mutex<KVec<GenDisk<NullBlkDevice>>>,
 }
 
 impl kernel::InPlaceModule for NullBlkModule {
     fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
         pr_info!("Rust null_blk loaded\n");
 
+        let mut disks = KVec::new();
+
+        let defer_init = move || -> Result<_, Error> {
+            for i in 0..module_parameters::nr_devices.value() {
+                let name = CString::try_from_fmt(fmt!("rnullb{}", i))?;
+
+                let disk = NullBlkDevice::new(
+                    &name,
+                    module_parameters::bs.value(),
+                    module_parameters::rotational.value(),
+                    module_parameters::gb.value() * 1024,
+                    module_parameters::irqmode.value().try_into()?,
+                )?;
+                disks.push(disk, GFP_KERNEL)?;
+            }
+
+            Ok(disks)
+        };
+
         try_pin_init!(Self {
             configfs_subsystem <- configfs::subsystem(),
+            param_disks <- new_mutex!(defer_init()?),
         })
     }
 }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 56/83] block: rust: add polled completion support
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add support for polled I/O completion to the Rust block layer bindings.
This includes the `poll` callback in `Operations` and the
`IoCompletionBatch` type for batched completions.

The `poll` callback is invoked by the block layer when polling for
completed requests on poll queues. Drivers can use `IoCompletionBatch`
to batch multiple completions efficiently.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/rnull.rs       |   1 +
 rust/helpers/blk.c                 |   7 +++
 rust/kernel/block/mq.rs            |   8 ++-
 rust/kernel/block/mq/operations.rs | 104 +++++++++++++++++++++++++++++++++++--
 rust/kernel/block/mq/request.rs    |   5 ++
 5 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 6fb307e33263..076493f92516 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -777,6 +777,7 @@ fn queue_rq(
         this: ArcBorrow<'_, Self>,
         rq: Owned<mq::IdleRequest<Self>>,
         _is_last: bool,
+        _is_poll: bool,
     ) -> BlkResult {
         if this.bandwidth_limit != 0 {
             if !this.bandwidth_timer.active() {
diff --git a/rust/helpers/blk.c b/rust/helpers/blk.c
index 6a70e1306a3a..500e3c6fd951 100644
--- a/rust/helpers/blk.c
+++ b/rust/helpers/blk.c
@@ -20,3 +20,10 @@ __rust_helper void rust_helper_bio_advance_iter_single(const struct bio *bio,
 {
 	bio_advance_iter_single(bio, iter, bytes);
 }
+
+bool rust_helper_blk_mq_add_to_batch(struct request *req,
+				     struct io_comp_batch *iob, bool is_error,
+				     void (*complete)(struct io_comp_batch *))
+{
+	return blk_mq_add_to_batch(req, iob, is_error, complete);
+}
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index e9bea19d684b..23bf95136bc1 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -89,7 +89,8 @@
 //!         _hw_data: (),
 //!         _queue_data: (),
 //!         rq: Owned<IdleRequest<Self>>,
-//!         _is_last: bool
+//!         _is_last: bool,
+//!         is_poll: bool
 //!     ) -> BlkResult {
 //!         rq.start().end_ok();
 //!         Ok(())
@@ -130,7 +131,10 @@
 mod request_queue;
 pub mod tag_set;
 
-pub use operations::Operations;
+pub use operations::{
+    IoCompletionBatch,
+    Operations, //
+};
 pub use request::{
     Command,
     IdleRequest,
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index 06faf5647aaa..1be4695ca944 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -91,6 +91,7 @@ fn queue_rq(
         queue_data: ForeignBorrowed<'_, Self::QueueData>,
         rq: Owned<IdleRequest<Self>>,
         is_last: bool,
+        is_poll: bool,
     ) -> BlkResult;
 
     /// Called by the kernel to indicate that queued requests should be submitted.
@@ -110,7 +111,15 @@ fn init_hctx(
 
     /// Called by the kernel to poll the device for completed requests. Only
     /// used for poll queues.
-    fn poll(_hw_data: ForeignBorrowed<'_, Self::HwData>) -> bool {
+    ///
+    /// Should return `Ok(true)` if any requests were completed during the call,
+    /// `Ok(false)` if no requests were completed, and `Err(e)` to signal an
+    /// error condition.
+    fn poll(
+        _hw_data: ForeignBorrowed<'_, Self::HwData>,
+        _queue_data: ForeignBorrowed<'_, Self::QueueData>,
+        _batch: &mut IoCompletionBatch<Self>,
+    ) -> Result<bool> {
         build_error!(crate::error::VTABLE_DEFAULT_ERROR)
     }
 
@@ -194,6 +203,11 @@ impl<T: Operations> OperationsVTable<T> {
         // `into_foreign` in `Self::init_hctx_callback`.
         let hw_data = unsafe { T::HwData::borrow((*hctx).driver_data) };
 
+        let is_poll = u32::from(
+            // SAFETY: `hctx` is valid as required by this function.
+            unsafe { (*hctx).type_ },
+        ) == bindings::hctx_type_HCTX_TYPE_POLL;
+
         // SAFETY: `hctx` is valid as required by this function.
         let queue_data = unsafe { (*(*hctx).queue).queuedata };
 
@@ -210,6 +224,7 @@ impl<T: Operations> OperationsVTable<T> {
             // SAFETY: `bd` is valid as required by the safety requirement for
             // this function.
             unsafe { (*bd).last },
+            is_poll,
         );
 
         if let Err(e) = ret {
@@ -268,13 +283,32 @@ impl<T: Operations> OperationsVTable<T> {
     /// previously initialized by a call to `init_hctx_callback`.
     unsafe extern "C" fn poll_callback(
         hctx: *mut bindings::blk_mq_hw_ctx,
-        _iob: *mut bindings::io_comp_batch,
+        iob: *mut bindings::io_comp_batch,
     ) -> crate::ffi::c_int {
         // SAFETY: By function safety requirement, `hctx` was initialized by
         // `init_hctx_callback` and thus `driver_data` came from a call to
         // `into_foreign`.
         let hw_data = unsafe { T::HwData::borrow((*hctx).driver_data) };
-        T::poll(hw_data).into()
+
+        // SAFETY: `hctx` is valid as required by this function.
+        let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+        // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+        // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+        // `ForeignOwnable::from_foreign` is only called when the tagset is
+        // dropped, which happens after we are dropped.
+        let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+
+        let mut batch = IoCompletionBatch {
+            inner: iob,
+            _p: PhantomData,
+        };
+
+        let ret = T::poll(hw_data, queue_data, &mut batch);
+        match ret {
+            Ok(val) => val.into(),
+            Err(e) => e.to_errno(),
+        }
     }
 
     /// This function is called by the C kernel. A pointer to this function is
@@ -473,3 +507,67 @@ pub(crate) const fn build() -> &'static bindings::blk_mq_ops {
         &Self::VTABLE
     }
 }
+
+/// A batch of I/O completions for polled I/O.
+///
+/// This struct wraps the C `struct io_comp_batch` and is used to batch
+/// multiple request completions together for improved efficiency during polled
+/// I/O operations.
+///
+/// When the kernel polls for completed requests via [`Operations::poll`], it
+/// passes an `IoCompletionBatch` to collect completed requests. The driver can
+/// add completed requests to the batch using [`add_request`], allowing the
+/// kernel to process multiple completions together rather than one at a time.
+///
+/// # Invariants
+///
+/// - `inner` must point to a valid `io_comp_batch`.
+///
+/// [`add_request`]: IoCompletionBatch::add_request
+#[repr(transparent)]
+pub struct IoCompletionBatch<T> {
+    inner: *mut bindings::io_comp_batch,
+    _p: PhantomData<T>,
+}
+
+impl<T: Operations> IoCompletionBatch<T> {
+    /// Attempt to add a completed request to this batch.
+    ///
+    /// This method tries to add `rq` to the batch for deferred completion. If
+    /// the request is successfully added, ownership is transferred to the batch
+    /// and the request will be completed later when the batch is processed.
+    ///
+    /// # Arguments
+    ///
+    /// - `rq`: The completed request to add to the batch.
+    /// - `error`: Set to `true` if the request completed with an error.
+    ///
+    /// # Return
+    ///
+    /// When this method returns `Err`, the caller is responsible for completing
+    /// the request through other means, such as calling
+    /// [`Request::complete`](super::Request::complete).
+    pub fn add_request(
+        &mut self,
+        rq: Owned<Request<T>>,
+        error: bool,
+    ) -> Result<(), Owned<Request<T>>> {
+        // SAFETY: By type invariant, `self.inner` is a valid `io_comp_batch`.
+        let ret = unsafe {
+            bindings::blk_mq_add_to_batch(
+                rq.as_raw(),
+                self.inner,
+                error,
+                Some(bindings::blk_mq_end_request_batch),
+            )
+        };
+
+        match ret {
+            true => {
+                core::mem::forget(rq);
+                Ok(())
+            }
+            false => Err(rq),
+        }
+    }
+}
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index 66ef2493c448..dbe657a80324 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -156,6 +156,11 @@ pub fn queue(&self) -> &RequestQueue<T> {
         // SAFETY: By type invariant, self.0 is guaranteed to be valid.
         unsafe { RequestQueue::from_raw((*self.0.get()).q) }
     }
+
+    /// Return a raw pointer to the underlying C structure.
+    pub fn as_raw(&self) -> *mut bindings::request {
+        self.0.get()
+    }
 }
 
 /// A wrapper around a blk-mq [`struct request`]. This represents an IO request.

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 52/83] block: rust: add `TagSet::flags`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add a way for block device drivers to query the flags that a `TagSet`
was configured with. This is needed so drivers can inspect properties
such as whether the tag set uses blocking queues.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq/tag_set.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
index 5359e60fb5a5..157c47f64334 100644
--- a/rust/kernel/block/mq/tag_set.rs
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -107,6 +107,15 @@ pub fn new(
     pub(crate) fn raw_tag_set(&self) -> *mut bindings::blk_mq_tag_set {
         self.inner.get()
     }
+
+    /// Return the [`Flags`] that this tag set was configured with.
+    pub fn flags(&self) -> Flags {
+        let this = self.raw_tag_set();
+        // SAFETY: By type invariant, `this` points to a valid and initialized
+        // `blk_mq_tag_set`.
+        let flags_raw = unsafe { (*this).flags };
+        Flags::try_from(flags_raw).expect("Expected valid flags from C struct")
+    }
 }
 
 #[pinned_drop]

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 79/83] block: rnull: allow configuration of the maximum IO size
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add module parameter and configfs option for controlling the maximum size
of an IO for the emulated block device.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs |  5 +++++
 drivers/block/rnull/rnull.rs    | 10 +++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index eaa7617e5ffa..5ab217e43e2b 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -132,6 +132,7 @@ fn make_group(
                 zone_append_max_sectors: 26,
                 poll_queues: 27,
                 fua: 28,
+                max_sectors: 29,
             ],
         };
 
@@ -219,6 +220,7 @@ fn make_group(
                     requeue_inject,
                     #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
                     init_hctx_inject,
+                    max_sectors: 0,
                 }),
             }),
             default_groups,
@@ -312,6 +314,7 @@ struct DeviceConfigInner {
     requeue_inject: Arc<FaultConfig>,
     #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
     init_hctx_inject: Arc<FaultConfig>,
+    max_sectors: u32,
 }
 
 #[vtable]
@@ -384,6 +387,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
                 requeue_inject: guard.requeue_inject.clone(),
                 #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
                 timeout_inject: guard.timeout_inject.clone(),
+                max_sectors: guard.max_sectors,
             })?);
             guard.powered = true;
         } else if guard.powered && !power_op {
@@ -612,3 +616,4 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
     },
 }
 configfs_simple_bool_field!(DeviceConfig, 28, fua);
+configfs_simple_field!(DeviceConfig, 29, max_sectors, u32);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index f909360ec70d..15b8c365b9fa 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -204,6 +204,10 @@
             default: true,
             description: "Enable/disable FUA support when cache_size is used.",
         },
+        max_sectors: u32 {
+            default: 0,
+            description: "Maximum size of a command (in 512B sectors)",
+        },
     },
 }
 
@@ -307,6 +311,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                     requeue_inject: Arc::pin_init(FaultConfig::new(c"requeue_inject"), GFP_KERNEL)?,
                     #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
                     timeout_inject: Arc::pin_init(FaultConfig::new(c"timeout_inject"), GFP_KERNEL)?,
+                    max_sectors: module_parameters::max_sectors.value(),
                 })?;
                 disks.push(disk, GFP_KERNEL)?;
             }
@@ -352,6 +357,7 @@ struct NullBlkOptions<'a> {
     requeue_inject: Arc<FaultConfig>,
     #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
     timeout_inject: Arc<FaultConfig>,
+    max_sectors: u32,
 }
 
 #[pin_data]
@@ -487,6 +493,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<Arc<GenDisk<Self>>> {
             requeue_inject,
             #[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
             timeout_inject,
+            max_sectors,
         } = options;
 
         let memory_backed = tag_set.memory_backed;
@@ -548,7 +555,8 @@ fn new(options: NullBlkOptions<'_>) -> Result<Arc<GenDisk<Self>>> {
             .physical_block_size(block_size_bytes)?
             .rotational(rotational)
             .write_cache(storage.cache_enabled())
-            .forced_unit_access(forced_unit_access && storage.cache_enabled());
+            .forced_unit_access(forced_unit_access && storage.cache_enabled())
+            .max_sectors(max_sectors);
 
         #[cfg(CONFIG_BLK_DEV_ZONED)]
         {

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 75/83] block: rust: add `Request::requeue`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add a method on `Request` to requeue the request with the block layer.
Drivers can use this method to send a request back to the block layer
without processing the request.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq/request.rs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index 54b5202567f8..bf6d58139ab4 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -100,6 +100,18 @@ pub(crate) unsafe fn from_raw(ptr: *mut bindings::request) -> Owned<Self> {
         // SAFETY: By function safety requirements, `ptr` is valid for use as an `IdleRequest`.
         unsafe { Owned::from_raw(NonNull::<Self>::new_unchecked(ptr.cast())) }
     }
+
+    /// Requeue this request at the block layer.
+    ///
+    /// If `kick_requeue_list` is true, this method will schedule processing of
+    /// the requeue list on a workqueue.
+    pub fn requeue(self: Owned<Self>, kick_requeue_list: bool) {
+        let ptr = self.0 .0.get();
+        core::mem::forget(self);
+
+        // SAFETY: By type invariant, the wrapped request is valid.
+        unsafe { bindings::blk_mq_requeue_request(ptr, kick_requeue_list) };
+    }
 }
 
 impl<T: Operations> Ownable for IdleRequest<T> {

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 23/83] block: rnull: add discard support
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add support for discard operations to the rnull block driver:
- Add discard module parameter and configfs attribute.
- Set max_hw_discard_sectors when discard is enabled.
- Add sector occupancy tracking.
- Add discard handling that frees sectors and removes empty pages.
- Discard operations require memory backing to function.

The discard feature uses a bitmap to track which sectors in each page are
occupied, allowing cleanup of pages when they are empty.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs |  15 +++++
 drivers/block/rnull/rnull.rs    | 120 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 121 insertions(+), 14 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 2f3fa81ea121..e47399cd45a4 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -93,6 +93,7 @@ fn make_group(
                 submit_queues: 7,
                 use_per_node_hctx: 8,
                 home_node: 9,
+                discard: 10,
             ],
         };
 
@@ -113,6 +114,7 @@ fn make_group(
                     memory_backed: false,
                     submit_queues: 1,
                     home_node: bindings::NUMA_NO_NODE,
+                    discard: false,
                 }),
             }),
             core::iter::empty(),
@@ -180,6 +182,7 @@ struct DeviceConfigInner {
     memory_backed: bool,
     submit_queues: u32,
     home_node: i32,
+    discard: bool,
 }
 
 #[vtable]
@@ -213,6 +216,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
                 memory_backed: guard.memory_backed,
                 submit_queues: guard.submit_queues,
                 home_node: guard.home_node,
+                discard: guard.discard,
             })?);
             guard.powered = true;
         } else if guard.powered && !power_op {
@@ -307,3 +311,14 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
         }
     })
 );
+
+configfs_attribute!(DeviceConfig, 10,
+    show: |this, page| show_field(this.data.lock().discard, page),
+    store: |this, page| store_with_power_check(this, page, |data, page| {
+        if !data.memory_backed {
+            return Err(EINVAL);
+        }
+        data.discard = kstrtobool_bytes(page)?;
+        Ok(())
+    })
+);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 1d0faf524f5c..bdc05b3f6072 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -19,15 +19,20 @@
             Operations,
             TagSet, //
         },
+        PAGE_SECTOR_MASK, SECTOR_SHIFT,
     },
     error::{
         code,
         Result, //
     },
+    ffi,
     memalloc_scope,
     new_mutex,
     new_xarray,
-    page::SafePage,
+    page::{
+        SafePage, //
+        PAGE_SIZE,
+    },
     pr_info,
     prelude::*,
     str::CString,
@@ -100,6 +105,11 @@
             default: -1,
             description: "Home node for the device. Default: -1 (no node)",
         },
+        discard: bool {
+            default: false,
+            description:
+                "Support discard operations (requires memory-backed null_blk device).",
+        },
     },
 }
 
@@ -137,6 +147,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                     memory_backed: module_parameters::memory_backed.value(),
                     submit_queues,
                     home_node: module_parameters::home_node.value(),
+                    discard: module_parameters::discard.value(),
                 })?;
                 disks.push(disk, GFP_KERNEL)?;
             }
@@ -161,6 +172,7 @@ struct NullBlkOptions<'a> {
     memory_backed: bool,
     submit_queues: u32,
     home_node: i32,
+    discard: bool,
 }
 struct NullBlkDevice;
 
@@ -176,6 +188,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
             memory_backed,
             submit_queues,
             home_node,
+            discard,
         } = options;
 
         let flags = if memory_backed {
@@ -205,22 +218,30 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
                 irq_mode,
                 completion_time,
                 memory_backed,
+                block_size: block_size as usize,
             }),
             GFP_KERNEL,
         )?;
 
-        gen_disk::GenDiskBuilder::new()
+        let mut builder = gen_disk::GenDiskBuilder::new()
             .capacity_sectors(capacity_mib << (20 - block::SECTOR_SHIFT))
             .logical_block_size(block_size)?
             .physical_block_size(block_size)?
-            .rotational(rotational)
-            .build(fmt!("{}", name.to_str()?), tagset, queue_data)
+            .rotational(rotational);
+
+        if memory_backed && discard {
+            builder = builder
+                // Max IO size is u32::MAX bytes
+                .max_hw_discard_sectors(ffi::c_uint::MAX >> block::SECTOR_SHIFT);
+        }
+
+        builder.build(fmt!("{}", name.to_str()?), tagset, queue_data)
     }
 
     #[inline(always)]
     fn write(tree: &XArray<TreeNode>, mut sector: usize, mut segment: Segment<'_>) -> Result {
         while !segment.is_empty() {
-            let page = SafePage::alloc_page(GFP_KERNEL)?;
+            let page = NullBlockPage::new()?;
             let mut tree = tree.lock();
 
             let page_idx = sector >> block::PAGE_SECTORS_SHIFT;
@@ -232,8 +253,10 @@ fn write(tree: &XArray<TreeNode>, mut sector: usize, mut segment: Segment<'_>) -
                 tree.get_mut(page_idx).unwrap()
             };
 
+            page.set_occupied(sector);
             let page_offset = (sector & block::PAGE_SECTOR_MASK as usize) << block::SECTOR_SHIFT;
-            sector += segment.copy_to_page(page, page_offset) >> block::SECTOR_SHIFT;
+            sector +=
+                segment.copy_to_page(page.page.as_pin_mut(), page_offset) >> block::SECTOR_SHIFT;
         }
         Ok(())
     }
@@ -248,7 +271,7 @@ fn read(tree: &XArray<TreeNode>, mut sector: usize, mut segment: Segment<'_>) ->
             if let Some(page) = tree.get(idx) {
                 let page_offset =
                     (sector & block::PAGE_SECTOR_MASK as usize) << block::SECTOR_SHIFT;
-                sector += segment.copy_from_page(page, page_offset) >> block::SECTOR_SHIFT;
+                sector += segment.copy_from_page(&page.page, page_offset) >> block::SECTOR_SHIFT;
             } else {
                 sector += segment.zero_page() >> block::SECTOR_SHIFT;
             }
@@ -257,6 +280,37 @@ fn read(tree: &XArray<TreeNode>, mut sector: usize, mut segment: Segment<'_>) ->
         Ok(())
     }
 
+    fn discard(
+        tree: &XArray<TreeNode>,
+        mut sector: usize,
+        sectors: usize,
+        block_size: usize,
+    ) -> Result {
+        let mut remaining_bytes = sectors << SECTOR_SHIFT;
+        let mut tree = tree.lock();
+
+        while remaining_bytes > 0 {
+            let page_idx = sector >> block::PAGE_SECTORS_SHIFT;
+            let mut remove = false;
+            if let Some(page) = tree.get_mut(page_idx) {
+                page.set_free(sector);
+                if page.is_empty() {
+                    remove = true;
+                }
+            }
+
+            if remove {
+                drop(tree.remove(page_idx))
+            }
+
+            let processed = remaining_bytes.min(block_size);
+            sector += processed >> SECTOR_SHIFT;
+            remaining_bytes -= processed;
+        }
+
+        Ok(())
+    }
+
     #[inline(never)]
     fn transfer(
         command: bindings::req_op,
@@ -273,7 +327,40 @@ fn transfer(
     }
 }
 
-type TreeNode = Owned<SafePage>;
+static_assert!((PAGE_SIZE >> SECTOR_SHIFT) <= 64);
+
+struct NullBlockPage {
+    page: Owned<SafePage>,
+    status: u64,
+}
+
+impl NullBlockPage {
+    fn new() -> Result<KBox<Self>> {
+        Ok(KBox::new(
+            Self {
+                page: SafePage::alloc_page(GFP_KERNEL | __GFP_ZERO)?,
+                status: 0,
+            },
+            GFP_KERNEL,
+        )?)
+    }
+
+    fn set_occupied(&mut self, sector: usize) {
+        let idx = sector & PAGE_SECTOR_MASK as usize;
+        self.status |= 1 << idx;
+    }
+
+    fn set_free(&mut self, sector: usize) {
+        let idx = sector & PAGE_SECTOR_MASK as usize;
+        self.status &= !(1 << idx);
+    }
+
+    fn is_empty(&self) -> bool {
+        self.status == 0
+    }
+}
+
+type TreeNode = KBox<NullBlockPage>;
 
 #[pin_data]
 struct QueueData {
@@ -282,6 +369,7 @@ struct QueueData {
     irq_mode: IRQMode,
     completion_time: Delta,
     memory_backed: bool,
+    block_size: usize,
 }
 
 #[pin_data]
@@ -332,12 +420,16 @@ fn queue_rq(
             let command = rq.command();
             let mut sector = rq.sector();
 
-            for bio in rq.bio_iter_mut() {
-                let segment_iter = bio.segment_iter();
-                for segment in segment_iter {
-                    let length = segment.len();
-                    Self::transfer(command, tree, sector, segment)?;
-                    sector += length as usize >> block::SECTOR_SHIFT;
+            if command == bindings::req_op_REQ_OP_DISCARD {
+                Self::discard(tree, sector, rq.sectors(), queue_data.block_size)?;
+            } else {
+                for bio in rq.bio_iter_mut() {
+                    let segment_iter = bio.segment_iter();
+                    for segment in segment_iter {
+                        let length = segment.len();
+                        Self::transfer(command, tree, sector, segment)?;
+                        sector += length as usize >> block::SECTOR_SHIFT;
+                    }
                 }
             }
         }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 83/83] block: rnull: add zone offline and readonly configfs files
From: Andreas Hindborg @ 2026-06-09 19:09 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add configfs attributes for managing zone states in the rnull zoned
block device emulation. The `zone_offline` and `zone_readonly`
attributes allow setting specific zones to offline or read-only states,
which is useful for testing how applications handle degraded zones.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs     | 76 +++++++++++++++++++++++++++++++++
 drivers/block/rnull/disk_storage.rs | 59 +++++++++++++-------------
 drivers/block/rnull/rnull.rs        |  2 +-
 drivers/block/rnull/zoned.rs        | 83 ++++++++++++++++++++++++++-----------
 4 files changed, 164 insertions(+), 56 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 1bab38c55698..43d01b419579 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -135,6 +135,8 @@ fn make_group(
                 max_sectors: 29,
                 virt_boundary: 30,
                 shared_tag_bitmap: 31,
+                zone_offline: 32,
+                zone_readonly: 33,
             ],
         };
 
@@ -627,3 +629,77 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
 configfs_simple_field!(DeviceConfig, 29, max_sectors, u32);
 configfs_simple_bool_field!(DeviceConfig, 30, virt_boundary);
 configfs_simple_bool_field!(DeviceConfig, 31, shared_tag_bitmap);
+
+#[cfg(CONFIG_BLK_DEV_ZONED)]
+fn set_zone_condition(
+    this: &DeviceConfig,
+    sector: u64,
+    cb: impl FnOnce(
+        &crate::zoned::ZoneOptions,
+        &DiskStorage,
+        &mut crate::zoned::ZoneDescriptor,
+    ) -> Result,
+) -> Result {
+    use crate::zoned::ZoneType;
+    let data_guard = this.data.lock();
+    let null_disk = data_guard.disk.as_ref().ok_or(EBUSY)?.queue_data();
+    let storage = &null_disk.storage;
+    let zone_options = &null_disk.zoned;
+    zone_options.enabled.then_some(()).ok_or(EINVAL)?;
+    let mut zone = zone_options.zone(sector)?.lock();
+
+    if zone.kind == ZoneType::Conventional {
+        return Err(EINVAL);
+    }
+
+    cb(zone_options, storage, &mut zone)
+}
+
+#[cfg(CONFIG_BLK_DEV_ZONED)]
+configfs_attribute!(
+    DeviceConfig,
+    32,
+    show: |_this, _page| Ok(0),
+    store: |this,page| {
+        let text = core::str::from_utf8(page)?.trim();
+        let sector = text.parse().map_err(|_| EINVAL)?;
+
+        set_zone_condition(this, sector, |zone_options, storage, zone| {
+            zone_options.offline_zone(storage, zone)
+        })?;
+        Ok(())
+    },
+);
+
+#[cfg(CONFIG_BLK_DEV_ZONED)]
+configfs_attribute!(
+    DeviceConfig,
+    33,
+    show: |_this, _page| Ok(0),
+    store: |this,page| {
+        let text = core::str::from_utf8(page)?.trim();
+        let sector = text.parse().map_err(|_| EINVAL)?;
+
+        set_zone_condition(this, sector, |zone_options, storage, zone| {
+            zone_options.read_only_zone(storage, zone)
+        })?;
+
+        Ok(())
+    },
+);
+
+#[cfg(not(CONFIG_BLK_DEV_ZONED))]
+configfs_attribute!(
+    DeviceConfig,
+    32,
+    show: |_this, _page| Ok(0),
+    store: |_this, _page| Err(ENOTSUPP),
+);
+
+#[cfg(not(CONFIG_BLK_DEV_ZONED))]
+configfs_attribute!(
+    DeviceConfig,
+    33,
+    show: |_this, _page| Ok(0),
+    store: |_this, _page| Err(ENOTSUPP),
+);
diff --git a/drivers/block/rnull/disk_storage.rs b/drivers/block/rnull/disk_storage.rs
index 6797b7996da3..879dd5d96e65 100644
--- a/drivers/block/rnull/disk_storage.rs
+++ b/drivers/block/rnull/disk_storage.rs
@@ -65,27 +65,45 @@ pub(crate) fn lock(&self) -> SpinLockGuard<'_, Pin<KBox<TreeContainer>>> {
         self.trees.lock()
     }
 
-    pub(crate) fn discard(
-        &self,
-        hw_data: &Pin<&SpinLock<HwQueueContext>>,
-        mut sector: u64,
-        sectors: u32,
-    ) {
-        let mut tree_guard = self.lock();
-        let mut hw_data_guard = hw_data.lock();
-
-        let mut access = self.access(&mut tree_guard, &mut hw_data_guard, None);
+    pub(crate) fn discard(&self, mut sector: u64, sectors: u32) {
+        let tree_guard = self.lock();
+        let mut cache_guard = tree_guard.cache_tree.lock();
+        let mut disk_guard = tree_guard.cache_tree.lock();
 
         let mut remaining_bytes = sectors_to_bytes(sectors);
 
         while remaining_bytes > 0 {
-            access.free_sector(sector);
+            self.free_sector(&mut cache_guard, &mut disk_guard, sector);
             let processed = remaining_bytes.min(self.block_size);
             sector += Into::<u64>::into(bytes_to_sectors(processed));
             remaining_bytes -= processed;
         }
     }
 
+    fn free_sector_tree(tree_access: &mut xarray::Guard<'_, TreeNode>, sector: u64) {
+        let index = DiskStorageAccess::to_index(sector);
+        if let Some(page) = tree_access.get_mut(index) {
+            page.set_free(sector);
+
+            if page.is_empty() {
+                tree_access.remove(index);
+            }
+        }
+    }
+
+    pub(crate) fn free_sector<'a>(
+        &self,
+        cache_guard: &mut xarray::Guard<'a, TreeNode>,
+        disk_guard: &mut xarray::Guard<'a, TreeNode>,
+        sector: u64,
+    ) {
+        if self.cache_size > 0 {
+            Self::free_sector_tree(cache_guard, sector);
+        }
+
+        Self::free_sector_tree(disk_guard, sector);
+    }
+
     pub(crate) fn flush(&self, hw_data: &Pin<&SpinLock<HwQueueContext>>) {
         let mut tree_guard = self.lock();
         let mut hw_data_guard = hw_data.lock();
@@ -286,25 +304,6 @@ pub(crate) fn get_read_page(&self, sector: u64) -> Option<&NullBlockPage> {
             self.disk_guard.get(index)
         }
     }
-
-    fn free_sector_tree(tree_access: &mut xarray::Guard<'_, TreeNode>, sector: u64) {
-        let index = Self::to_index(sector);
-        if let Some(page) = tree_access.get_mut(index) {
-            page.set_free(sector);
-
-            if page.is_empty() {
-                tree_access.remove(index);
-            }
-        }
-    }
-
-    pub(crate) fn free_sector(&mut self, sector: u64) {
-        if self.disk_storage.cache_size > 0 {
-            Self::free_sector_tree(&mut self.cache_guard, sector);
-        }
-
-        Self::free_sector_tree(&mut self.disk_guard, sector);
-    }
 }
 
 type TreeNode = KBox<NullBlockPage>;
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 81f9e2d03f31..b6371fe4ebeb 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -798,7 +798,7 @@ fn handle_regular_command(
         if self.memory_backed {
             memalloc_scope!(let _noio: NoIo);
             if rq.command() == mq::Command::Discard {
-                self.storage.discard(hw_data, rq.sector(), sectors);
+                self.storage.discard(rq.sector(), sectors);
             } else {
                 self.transfer(hw_data, rq, rq.command(), sectors)?;
             }
diff --git a/drivers/block/rnull/zoned.rs b/drivers/block/rnull/zoned.rs
index 808449cc49e1..cf0eb5d31840 100644
--- a/drivers/block/rnull/zoned.rs
+++ b/drivers/block/rnull/zoned.rs
@@ -179,7 +179,7 @@ pub(crate) fn handle_zoned_command(
         match rq.command() {
             ZoneAppend | Write => self.zoned_write(hw_data, rq)?,
             ZoneReset | ZoneResetAll | ZoneOpen | ZoneClose | ZoneFinish => {
-                self.zone_management(hw_data, rq)?
+                self.zone_management(rq)?
             }
             _ => self.zoned_read(hw_data, rq)?,
         }
@@ -187,18 +187,14 @@ pub(crate) fn handle_zoned_command(
         Ok(())
     }
 
-    fn zone_management(
-        &self,
-        hw_data: &Pin<&SpinLock<HwQueueContext>>,
-        rq: &mut Owned<mq::Request<Self>>,
-    ) -> Result {
+    fn zone_management(&self, rq: &mut Owned<mq::Request<Self>>) -> Result {
         if rq.command() == mq::Command::ZoneResetAll {
             for zone in self.zoned.zones_iter() {
                 let mut zone = zone.lock();
                 use ZoneCondition::*;
                 match zone.condition {
                     Empty | ReadOnly | Offline => continue,
-                    _ => self.zoned.reset_zone(&self.storage, hw_data, &mut zone)?,
+                    _ => self.zoned.reset_zone(&self.storage, &mut zone)?,
                 }
             }
 
@@ -214,10 +210,10 @@ fn zone_management(
 
         use mq::Command::*;
         match rq.command() {
-            ZoneOpen => self.zoned.open_zone(&mut zone, rq.sector()),
+            ZoneOpen => self.zoned.open_zone(&mut zone),
             ZoneClose => self.zoned.close_zone(&mut zone),
-            ZoneReset => self.zoned.reset_zone(&self.storage, hw_data, &mut zone),
-            ZoneFinish => self.zoned.finish_zone(&mut zone, rq.sector()),
+            ZoneReset => self.zoned.reset_zone(&self.storage, &mut zone),
+            ZoneFinish => self.zoned.finish_zone(&mut zone),
             _ => Err(EIO),
         }
     }
@@ -283,7 +279,7 @@ fn zoned_write(
             if self.zoned.use_accounting() {
                 let mut accounting = self.zoned.accounting.lock();
                 self.zoned
-                    .check_zone_resources(&mut accounting, &mut zone, rq.sector())?;
+                    .check_zone_resources(&mut accounting, &mut zone)?;
 
                 if zone.condition == ZoneCondition::Closed {
                     accounting.closed -= 1;
@@ -367,7 +363,7 @@ fn zone_no(&self, sector: u64) -> usize {
         (sector >> self.size_sectors.ilog2()) as usize
     }
 
-    fn zone(&self, sector: u64) -> Result<&Mutex<ZoneDescriptor>> {
+    pub(crate) fn zone(&self, sector: u64) -> Result<&Mutex<ZoneDescriptor>> {
         self.zones.get(self.zone_no(sector)).ok_or(EINVAL)
     }
 
@@ -420,7 +416,7 @@ fn try_close_implicit_open_zone(&self, accounting: &mut ZoneAccounting, sector:
         Err(EINVAL)
     }
 
-    fn open_zone(&self, zone: &mut ZoneDescriptor, sector: u64) -> Result {
+    fn open_zone(&self, zone: &mut ZoneDescriptor) -> Result {
         if zone.kind == ZoneType::Conventional {
             return Err(EINVAL);
         }
@@ -436,13 +432,13 @@ fn open_zone(&self, zone: &mut ZoneDescriptor, sector: u64) -> Result {
             let mut accounting = self.accounting.lock();
             match zone.condition {
                 Empty => {
-                    self.check_zone_resources(&mut accounting, zone, sector)?;
+                    self.check_zone_resources(&mut accounting, zone)?;
                 }
                 ImplicitOpen => {
                     accounting.implicit_open -= 1;
                 }
                 Closed => {
-                    self.check_zone_resources(&mut accounting, zone, sector)?;
+                    self.check_zone_resources(&mut accounting, zone)?;
                     accounting.closed -= 1;
                 }
                 _ => (),
@@ -459,14 +455,13 @@ fn check_zone_resources(
         &self,
         accounting: &mut ZoneAccounting,
         zone: &mut ZoneDescriptor,
-        sector: u64,
     ) -> Result {
         match zone.condition {
             ZoneCondition::Empty => {
                 self.check_active_zones(accounting)?;
-                self.check_open_zones(accounting, sector)
+                self.check_open_zones(accounting, zone.start_sector)
             }
-            ZoneCondition::Closed => self.check_open_zones(accounting, sector),
+            ZoneCondition::Closed => self.check_open_zones(accounting, zone.start_sector),
             _ => Err(EIO),
         }
     }
@@ -535,7 +530,7 @@ fn close_zone(&self, zone: &mut ZoneDescriptor) -> Result {
         Ok(())
     }
 
-    fn finish_zone(&self, zone: &mut ZoneDescriptor, sector: u64) -> Result {
+    fn finish_zone(&self, zone: &mut ZoneDescriptor) -> Result {
         if zone.kind == ZoneType::Conventional {
             return Err(EINVAL);
         }
@@ -547,12 +542,12 @@ fn finish_zone(&self, zone: &mut ZoneDescriptor, sector: u64) -> Result {
             match zone.condition {
                 Full => return Ok(()),
                 Empty => {
-                    self.check_zone_resources(&mut accounting, zone, sector)?;
+                    self.check_zone_resources(&mut accounting, zone)?;
                 }
                 ImplicitOpen => accounting.implicit_open -= 1,
                 ExplicitOpen => accounting.explicit_open -= 1,
                 Closed => {
-                    self.check_zone_resources(&mut accounting, zone, sector)?;
+                    self.check_zone_resources(&mut accounting, zone)?;
                     accounting.closed -= 1;
                 }
                 _ => return Err(EIO),
@@ -568,7 +563,6 @@ fn finish_zone(&self, zone: &mut ZoneDescriptor, sector: u64) -> Result {
     fn reset_zone(
         &self,
         storage: &crate::disk_storage::DiskStorage,
-        hw_data: &Pin<&SpinLock<HwQueueContext>>,
         zone: &mut ZoneDescriptor,
     ) -> Result {
         if zone.kind == ZoneType::Conventional {
@@ -591,16 +585,55 @@ fn reset_zone(
         zone.condition = ZoneCondition::Empty;
         zone.write_pointer = zone.start_sector;
 
-        storage.discard(hw_data, zone.start_sector, zone.size_sectors);
+        storage.discard(zone.start_sector, zone.size_sectors);
+
+        Ok(())
+    }
+
+    fn set_zone_condition(
+        &self,
+        storage: &crate::disk_storage::DiskStorage,
+        zone: &mut ZoneDescriptor,
+        condition: ZoneCondition,
+    ) -> Result {
+        if zone.condition == condition {
+            zone.condition = ZoneCondition::Empty;
+            zone.write_pointer = zone.start_sector;
+            storage.discard(zone.start_sector, zone.size_sectors);
+        } else {
+            if matches!(
+                zone.condition,
+                ZoneCondition::ReadOnly | ZoneCondition::Offline
+            ) {
+                self.finish_zone(zone)?;
+            }
 
+            zone.condition = ZoneCondition::Offline;
+            zone.write_pointer = u64::MAX;
+        }
         Ok(())
     }
+    pub(crate) fn offline_zone(
+        &self,
+        storage: &crate::disk_storage::DiskStorage,
+        zone: &mut ZoneDescriptor,
+    ) -> Result {
+        self.set_zone_condition(storage, zone, ZoneCondition::Offline)
+    }
+
+    pub(crate) fn read_only_zone(
+        &self,
+        storage: &crate::disk_storage::DiskStorage,
+        zone: &mut ZoneDescriptor,
+    ) -> Result {
+        self.set_zone_condition(storage, zone, ZoneCondition::ReadOnly)
+    }
 }
 
 pub(crate) struct ZoneDescriptor {
     start_sector: u64,
     size_sectors: u32,
-    kind: ZoneType,
+    pub(crate) kind: ZoneType,
     capacity_sectors: u32,
     write_pointer: u64,
     condition: ZoneCondition,
@@ -628,7 +661,7 @@ fn check_bounds_read(&self, sector: u64, sectors: u32) -> Result {
 
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 #[repr(u32)]
-enum ZoneType {
+pub(crate) enum ZoneType {
     Conventional = bindings::blk_zone_type_BLK_ZONE_TYPE_CONVENTIONAL,
     SequentialWriteRequired = bindings::blk_zone_type_BLK_ZONE_TYPE_SEQWRITE_REQ,
     #[expect(dead_code)]

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 46/83] block: rnull: add shared tags
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add support for sharing tags between multiple rnull devices. When
enabled via the `shared_tags` configfs attribute, all devices in the
group share a single tag set, reducing memory usage.

This feature requires creating a shared `TagSet` that can be referenced
by multiple devices.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs |  44 +++++++++----
 drivers/block/rnull/rnull.rs    | 136 +++++++++++++++++++++++++---------------
 rust/kernel/block/mq/tag_set.rs |  18 ++++++
 3 files changed, 136 insertions(+), 62 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 5e6bcf9d31d8..a84854e7c358 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -10,9 +10,12 @@
     bindings,
     block::{
         badblocks::BadBlocks,
-        mq::gen_disk::{
-            GenDisk,
-            GenDiskBuilder, //
+        mq::{
+            gen_disk::{
+                GenDisk,
+                GenDiskBuilder, //
+            },
+            TagSet, //
         }, //
     },
     configfs::{
@@ -45,7 +48,9 @@
 
 mod macros;
 
-pub(crate) fn subsystem() -> impl PinInit<kernel::configfs::Subsystem<Config>, Error> {
+pub(crate) fn subsystem(
+    shared_tag_set: Arc<TagSet<NullBlkDevice>>,
+) -> impl PinInit<kernel::configfs::Subsystem<Config>, Error> {
     let item_type = configfs_attrs! {
         container: configfs::Subsystem<Config>,
         data: Config,
@@ -55,11 +60,17 @@ pub(crate) fn subsystem() -> impl PinInit<kernel::configfs::Subsystem<Config>, E
         ],
     };
 
-    kernel::configfs::Subsystem::new(c"rnull", item_type, try_pin_init!(Config {}))
+    kernel::configfs::Subsystem::new(
+        c"rnull",
+        item_type,
+        try_pin_init!(Config { shared_tag_set }),
+    )
 }
 
 #[pin_data]
-pub(crate) struct Config {}
+pub(crate) struct Config {
+    shared_tag_set: Arc<TagSet<NullBlkDevice>>,
+}
 
 #[vtable]
 impl AttributeOperations<0> for Config {
@@ -69,7 +80,7 @@ impl AttributeOperations<0> for Config {
         let mut writer = kernel::str::Formatter::new(page);
         writer.write_str(
             "blocksize,size,rotational,irqmode,completion_nsec,memory_backed,\
-             submit_queues,use_per_node_hctx,discard,blocking\n",
+             submit_queues,use_per_node_hctx,discard,blocking,shared_tags\n",
         )?;
         Ok(writer.bytes_written())
     }
@@ -106,6 +117,7 @@ fn make_group(
                 cache_size_mib: 15,
                 mbps: 16,
                 blocking: 17,
+                shared_tags: 18,
             ],
         };
 
@@ -139,6 +151,8 @@ fn make_group(
                     cache_size_mib: 0,
                     mbps: 0,
                     blocking: false,
+                    shared_tags: false,
+                    shared_tag_set: self.shared_tag_set.clone(),
                 }),
             }),
             core::iter::empty(),
@@ -215,6 +229,8 @@ struct DeviceConfigInner {
     disk_storage: Arc<DiskStorage>,
     mbps: u32,
     blocking: bool,
+    shared_tags: bool,
+    shared_tag_set: Arc<TagSet<NullBlkDevice>>,
 }
 
 #[vtable]
@@ -245,17 +261,20 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
                 capacity_mib: guard.capacity_mib,
                 irq_mode: guard.irq_mode,
                 completion_time: guard.completion_time,
-                memory_backed: guard.memory_backed,
-                submit_queues: guard.submit_queues,
-                home_node: guard.home_node,
                 discard: guard.discard,
-                no_sched: guard.no_sched,
                 bad_blocks: guard.bad_blocks.clone(),
                 bad_blocks_once: guard.bad_blocks_once,
                 bad_blocks_partial_io: guard.bad_blocks_partial_io,
                 storage: guard.disk_storage.clone(),
                 bandwidth_limit: u64::from(guard.mbps) * 2u64.pow(20),
-                blocking: guard.blocking,
+                shared_tag_set: guard.shared_tags.then(|| guard.shared_tag_set.clone()),
+                tag_set: crate::TagSetOptions {
+                    submit_queues: guard.submit_queues,
+                    home_node: guard.home_node,
+                    blocking: guard.blocking,
+                    memory_backed: guard.memory_backed,
+                    no_sched: guard.no_sched,
+                },
             })?);
             guard.powered = true;
         } else if guard.powered && !power_op {
@@ -427,3 +446,4 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
 
 configfs_simple_field!(DeviceConfig, 16, mbps, u32);
 configfs_simple_bool_field!(DeviceConfig, 17, blocking);
+configfs_simple_bool_field!(DeviceConfig, 18, shared_tags);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 181fce551a91..bcf6a85f1cbc 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -143,6 +143,10 @@
             default: false,
             description: "Register as a blocking blk-mq driver device",
         },
+        shared_tags: bool {
+            default: false,
+            description: "Share tag set between devices for blk-mq",
+        },
     },
 }
 
@@ -158,19 +162,30 @@ impl kernel::InPlaceModule for NullBlkModule {
     fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
         pr_info!("Rust null_blk loaded\n");
 
-        let mut disks = KVec::new();
+        pin_init::pin_init_scope(move || -> Result<_, Error> {
+            let submit_queues = if module_parameters::use_per_node_hctx.value() {
+                kernel::numa::num_online_nodes()
+            } else {
+                module_parameters::submit_queues.value()
+            };
+            let home_node = module_parameters::home_node.value();
+            let blocking = module_parameters::blocking.value();
+            let memory_backed = module_parameters::memory_backed.value();
+            let no_sched = module_parameters::no_sched.value();
+
+            let shared_tag_set = NullBlkDevice::build_tag_set(TagSetOptions {
+                submit_queues,
+                home_node,
+                blocking,
+                memory_backed,
+                no_sched,
+            })?;
 
-        let defer_init = move || -> Result<_, Error> {
+            let mut disks = KVec::new();
             let completion_time: i64 = module_parameters::completion_nsec.value().try_into()?;
             for i in 0..module_parameters::nr_devices.value() {
                 let name = CString::try_from_fmt(fmt!("rnullb{}", i))?;
 
-                let submit_queues = if module_parameters::use_per_node_hctx.value() {
-                    kernel::numa::num_online_nodes()
-                } else {
-                    module_parameters::submit_queues.value()
-                };
-
                 let block_size = module_parameters::bs.value();
                 let disk = NullBlkDevice::new(NullBlkOptions {
                     name: &name,
@@ -179,27 +194,30 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                     capacity_mib: module_parameters::gb.value() * 1024,
                     irq_mode: module_parameters::irqmode.value().try_into()?,
                     completion_time: Delta::from_nanos(completion_time),
-                    memory_backed: module_parameters::memory_backed.value(),
-                    submit_queues,
-                    home_node: module_parameters::home_node.value(),
                     discard: module_parameters::discard.value(),
-                    no_sched: module_parameters::no_sched.value(),
                     bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
                     bad_blocks_once: false,
                     bad_blocks_partial_io: false,
                     storage: Arc::pin_init(DiskStorage::new(0, block_size as usize), GFP_KERNEL)?,
                     bandwidth_limit: u64::from(module_parameters::mbps.value()) * 2u64.pow(20),
-                    blocking: module_parameters::blocking.value(),
+                    shared_tag_set: module_parameters::shared_tags
+                        .value()
+                        .then(|| shared_tag_set.clone()),
+                    tag_set: TagSetOptions {
+                        submit_queues,
+                        home_node,
+                        blocking,
+                        memory_backed,
+                        no_sched,
+                    },
                 })?;
                 disks.push(disk, GFP_KERNEL)?;
             }
 
-            Ok(disks)
-        };
-
-        try_pin_init!(Self {
-            configfs_subsystem <- configfs::subsystem(),
-            param_disks <- new_mutex!(defer_init()?),
+            Ok(try_pin_init!(Self {
+                configfs_subsystem <- configfs::subsystem(shared_tag_set),
+                param_disks <- new_mutex!(disks),
+            }))
         })
     }
 }
@@ -211,17 +229,14 @@ struct NullBlkOptions<'a> {
     capacity_mib: u64,
     irq_mode: IRQMode,
     completion_time: Delta,
-    memory_backed: bool,
-    submit_queues: u32,
-    home_node: i32,
     discard: bool,
-    no_sched: bool,
     bad_blocks: Arc<BadBlocks>,
     bad_blocks_once: bool,
     bad_blocks_partial_io: bool,
     storage: Arc<DiskStorage>,
     bandwidth_limit: u64,
-    blocking: bool,
+    shared_tag_set: Option<Arc<TagSet<NullBlkDevice>>>,
+    tag_set: TagSetOptions,
 }
 
 #[pin_data]
@@ -243,9 +258,50 @@ struct NullBlkDevice {
     disk: SetOnce<Arc<Revocable<GenDiskRef<Self>>>>,
 }
 
+struct TagSetOptions {
+    submit_queues: u32,
+    home_node: i32,
+    blocking: bool,
+    memory_backed: bool,
+    no_sched: bool,
+}
+
 impl NullBlkDevice {
     const BANDWIDTH_TIMER_INTERVAL: Delta = Delta::from_millis(20);
 
+    fn build_tag_set(options: TagSetOptions) -> Result<Arc<TagSet<Self>>> {
+        let TagSetOptions {
+            submit_queues,
+            home_node,
+            blocking,
+            memory_backed,
+            no_sched,
+        } = options;
+
+        if home_node > kernel::numa::num_online_nodes().try_into()? {
+            return Err(code::EINVAL);
+        }
+
+        let numa_node = if home_node == -1 {
+            kernel::alloc::NumaNode::NO_NODE
+        } else {
+            kernel::alloc::NumaNode::new(home_node)?
+        };
+
+        let mut flags = mq::tag_set::Flags::default();
+        if blocking || memory_backed {
+            flags |= mq::tag_set::Flag::Blocking;
+        }
+        if no_sched {
+            flags |= mq::tag_set::Flag::NoDefaultScheduler;
+        }
+
+        Arc::pin_init(
+            TagSet::new(submit_queues, (), 256, 1, numa_node, flags),
+            GFP_KERNEL,
+        )
+    }
+
     fn new(options: NullBlkOptions<'_>) -> Result<Arc<GenDisk<Self>>> {
         let NullBlkOptions {
             name,
@@ -254,37 +310,22 @@ fn new(options: NullBlkOptions<'_>) -> Result<Arc<GenDisk<Self>>> {
             capacity_mib,
             irq_mode,
             completion_time,
-            memory_backed,
-            submit_queues,
-            home_node,
             discard,
-            no_sched,
             bad_blocks,
             bad_blocks_once,
             bad_blocks_partial_io,
             storage,
             bandwidth_limit,
-            blocking,
+            shared_tag_set,
+            tag_set,
         } = options;
 
-        let mut flags = mq::tag_set::Flags::default();
+        let memory_backed = tag_set.memory_backed;
 
-        if blocking || memory_backed {
-            flags |= mq::tag_set::Flag::Blocking;
-        }
-
-        if no_sched {
-            flags |= mq::tag_set::Flag::NoDefaultScheduler;
-        }
-
-        if home_node > kernel::numa::num_online_nodes().try_into()? {
-            return Err(code::EINVAL);
-        }
-
-        let numa_node = if home_node == -1 {
-            kernel::alloc::NumaNode::NO_NODE
+        let tagset = if let Some(shared) = shared_tag_set {
+            shared
         } else {
-            kernel::alloc::NumaNode::new(home_node)?
+            Self::build_tag_set(tag_set)?
         };
 
         let capacity_sectors = capacity_mib << (20 - block::SECTOR_SHIFT);
@@ -294,11 +335,6 @@ fn new(options: NullBlkOptions<'_>) -> Result<Arc<GenDisk<Self>>> {
             return Err(code::EINVAL);
         }
 
-        let tagset = Arc::pin_init(
-            TagSet::new(submit_queues, (), 256, 1, numa_node, flags),
-            GFP_KERNEL,
-        )?;
-
         let queue_data = Arc::try_pin_init(
             try_pin_init!(Self {
                 storage,
diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
index bfb8f8af4ee1..5359e60fb5a5 100644
--- a/rust/kernel/block/mq/tag_set.rs
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -124,3 +124,21 @@ fn drop(self: Pin<&mut Self>) {
         unsafe { T::TagSetData::from_foreign(tagset_data) };
     }
 }
+
+// SAFETY: It is safe to share references to `TagSet` across thread boundaries as long as
+// `TagSetData` is `Sync`.
+unsafe impl<T> Sync for TagSet<T>
+where
+    T: Operations,
+    T::TagSetData: Sync,
+{
+}
+
+// SAFETY: It is safe to transfer ownership of `TagSet` across thread boundaries if the associated
+// private data is `Send` (it will be dropped with the `TagSet`).
+unsafe impl<T> Send for TagSet<T>
+where
+    T: Operations,
+    T::TagSetData: Send,
+{
+}

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 69/83] block: rust: add `queue_rqs` vtable hook
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add support for the `queue_rqs` callback to the Rust block layer
bindings. This callback allows drivers to receive multiple requests in
a single call, enabling batch processing optimizations.

The callback receives a `RequestList` containing the requests to be
processed. Drivers should remove successfully processed requests from
the list; any remaining requests will be requeued individually.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq/operations.rs | 61 +++++++++++++++++++++++++++++++++++++-
 rust/kernel/block/mq/request.rs    | 26 ++++++++++++++++
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index 1be4695ca944..505e7d2b2253 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -38,6 +38,8 @@
 };
 use pin_init::PinInit;
 
+use super::request_list::RequestList;
+
 type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>;
 
 /// Implement this trait to interface blk-mq as block devices.
@@ -94,6 +96,15 @@ fn queue_rq(
         is_poll: bool,
     ) -> BlkResult;
 
+    /// Called by the kernel to queue a list of requests with the driver.
+    fn queue_rqs(
+        _hw_data: ForeignBorrowed<'_, Self::HwData>,
+        _queue_data: ForeignBorrowed<'_, Self::QueueData>,
+        _requests: &mut RequestList<Self>,
+    ) {
+        build_error!(crate::error::VTABLE_DEFAULT_ERROR)
+    }
+
     /// Called by the kernel to indicate that queued requests should be submitted.
     fn commit_rqs(
         hw_data: ForeignBorrowed<'_, Self::HwData>,
@@ -234,6 +245,50 @@ impl<T: Operations> OperationsVTable<T> {
         }
     }
 
+    /// This function is called by the C kernel to queue a list of new requests.
+    ///
+    /// Driver is guaranteed that each request belongs to the same queue. If the
+    /// driver doesn't empty the `rqlist` completely, then the rest will be
+    /// queued individually by the block layer upon return.
+    ///
+    /// # SAFETY
+    ///
+    /// - `requests` must satisfy the safety requirements of `RequestList<T>`
+    /// - All requests in `requests` must belong to the same hardware context.
+    unsafe extern "C" fn queue_rqs_callback(requests: *mut bindings::rq_list) {
+        // SAFETY:
+        // - By the safety requirements of this function, `requests` is valid for use as a
+        // `RequestList`.
+        // - We have exclusive access to `requests` for the duration of this function.
+        let requests = unsafe { RequestList::from_raw(requests) };
+
+        let rq_ptr = requests.peek_raw();
+
+        if rq_ptr.is_null() {
+            return;
+        }
+
+        // SAFETY: By function safety requirements, rq_ptr is pointing to a
+        // valid request.
+        let hctx = unsafe { (*rq_ptr).mq_hctx };
+
+        // SAFETY: The safety requirement for this function ensure that `hctx`
+        // is valid and that `driver_data` was produced by a call to
+        // `into_foreign` in `Self::init_hctx_callback`.
+        let hw_data = unsafe { T::HwData::borrow((*hctx).driver_data) };
+
+        // SAFETY: `hctx` is valid as required by this function.
+        let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+        // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+        // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+        // `ForeignOwnable::from_foreign` is only called when the tagset is
+        // dropped, which happens after we are dropped.
+        let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+
+        T::queue_rqs(hw_data, queue_data, requests);
+    }
+
     /// This function is called by the C kernel. A pointer to this function is
     /// installed in the `blk_mq_ops` vtable for the driver.
     ///
@@ -475,7 +530,11 @@ impl<T: Operations> OperationsVTable<T> {
 
     const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops {
         queue_rq: Some(Self::queue_rq_callback),
-        queue_rqs: None,
+        queue_rqs: if T::HAS_QUEUE_RQS {
+            Some(Self::queue_rqs_callback)
+        } else {
+            None
+        },
         commit_rqs: Some(Self::commit_rqs_callback),
         get_budget: None,
         put_budget: None,
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index 84f8b2c17f85..9c451583e75d 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -173,6 +173,32 @@ pub fn queue(&self) -> &RequestQueue<T> {
     pub fn as_raw(&self) -> *mut bindings::request {
         self.0.get()
     }
+
+    // Return a valid hctx pointer.
+    fn hctx_raw(&self) -> *mut bindings::blk_mq_hw_ctx {
+        // SAFETY: The requests is guaranteed to be associated with a hardware
+        // context while we have access to it.
+        unsafe { (*self.0.get()).mq_hctx }
+    }
+
+    /// Get a reference to the [`T::HwData`] for the hardware context that this
+    /// request is associated with.
+    pub fn hw_data(&self) -> <T::HwData as ForeignOwnable>::Borrowed<'_> {
+        let hctx = self.hctx_raw();
+
+        // SAFETY: `hctx` is valid and `driver_data` was produced by a call to
+        // `into_foreign` in `Operations::init_hctx_callback`.
+        unsafe { T::HwData::borrow((*hctx).driver_data) }
+    }
+
+    pub fn is_poll(&self) -> bool {
+        let hctx = self.hctx_raw();
+
+        u32::from(
+            // SAFETY: `hctx_raw` returns a valid pointer.
+            unsafe { (*hctx).type_ },
+        ) == bindings::hctx_type_HCTX_TYPE_POLL
+    }
 }
 
 /// A wrapper around a blk-mq [`struct request`]. This represents an IO request.

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 01/83] block: rust: fix `Send` bound for `GenDisk`
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux, Yuan Tan
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

The `Send` implementation for `GenDisk<T>` was conditioned on `T: Send`.
This constrains the wrong type. `T` is the `Operations` implementation,
which is typically a zero-sized marker type that carries no data, so `T:
Send` says nothing about whether the data a `GenDisk` actually owns can be
moved to another thread.

A `GenDisk<T>` owns the queue data `T::QueueData` (stored as the
`gendisk`'s `queuedata` and dropped when the `GenDisk` is dropped) and an
`Arc<TagSet<T>>`. These are the values transferred when a `GenDisk` is sent
across a thread boundary, so the `Send` bound must constrain exactly them.
Bound `T::QueueData: Send` and `Arc<TagSet<T>>: Send` instead.

Fixes: 3253aba3408a ("rust: block: introduce `kernel::block::mq` module")
Suggested-by: Yuan Tan <ytan089@ucr.edu>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---

Please take patch from Yuan instead of this one, if they send a fixed
version [1].

[1] https://lore.kernel.org/r/8839ddc5ff54bf454d508cde91d27d00fc3e2dd8.1780633578.git.ytan089@ucr.edu
---
 rust/kernel/block/mq/gen_disk.rs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 912cb805caf5..b36d24382cc3 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -199,8 +199,14 @@ pub struct GenDisk<T: Operations> {
 }
 
 // SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
-// `TagSet` It is safe to send this to other threads as long as T is Send.
-unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
+// `TagSet`. It is safe to send this to other threads as long as these two are `Send`.
+unsafe impl<T> Send for GenDisk<T>
+where
+    T: Operations,
+    T::QueueData: Send,
+    Arc<TagSet<T>>: Send,
+{
+}
 
 impl<T: Operations> Drop for GenDisk<T> {
     fn drop(&mut self) {

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 28/83] block: rust: mq: add Request::end() method for custom status codes
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add end() method to Request that accepts a custom status code parameter,
refactoring end_ok() to use it with BLK_STS_OK.

Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq/request.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index 9e176f015ab8..c06907dfe5b5 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -336,13 +336,18 @@ pub(crate) unsafe fn start_unchecked(&mut self) {
 
     /// Notify the block layer that the request has been completed without errors.
     pub fn end_ok(self) {
+        self.end(bindings::BLK_STS_OK)
+    }
+
+    /// Notify the block layer that the request has been completed.
+    pub fn end(self, status: u8) {
         let request_ptr = self.0.get().cast();
         core::mem::forget(self);
         // SAFETY: By type invariant, `this.0` was a valid `struct request`. The
         // existence of `self` guarantees that there are no `ARef`s pointing to
         // this request. Therefore it is safe to hand it back to the block
         // layer.
-        unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK) };
+        unsafe { bindings::blk_mq_end_request(request_ptr, status) };
     }
 }
 

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 07/83] block: rust: change `queue_rq` request type to `Owned`
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Simplify the reference counting scheme for `Request` from 4 states to 3
states. This is achieved by coalescing the zero state between block layer
owned and uniquely owned by driver.

Implement `Ownable` for `Request` and deliver `Request` to drivers as
`Owned<Request>`. In this process:

 - Move uniqueness assertions out of `rnull` as these are now guaranteed by
   the `Owned` type.
 - Move `start_unchecked`, `try_set_end` and `end_ok` from `Request` to
   `Owned<Request>`, relying on type invariant for uniqueness.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/rnull.rs       |  26 ++---
 rust/kernel/block/mq.rs            |  10 +-
 rust/kernel/block/mq/operations.rs |  32 +++--
 rust/kernel/block/mq/request.rs    | 231 ++++++++++++++++++++++---------------
 4 files changed, 176 insertions(+), 123 deletions(-)

diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 77ccc6850961..69cf62475446 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -19,7 +19,8 @@
         },
     },
     error::Result,
-    new_mutex, pr_info,
+    new_mutex,
+    pr_info,
     prelude::*,
     str::CString,
     sync::{
@@ -27,6 +28,10 @@
         Arc,
         Mutex, //
     },
+    types::{
+        OwnableRefCounted,
+        Owned, //
+    }, //
 };
 
 module! {
@@ -129,15 +134,10 @@ impl Operations for NullBlkDevice {
     type QueueData = KBox<QueueData>;
 
     #[inline(always)]
-    fn queue_rq(queue_data: &QueueData, rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result {
+    fn queue_rq(queue_data: &QueueData, rq: Owned<mq::Request<Self>>, _is_last: bool) -> Result {
         match queue_data.irq_mode {
-            IRQMode::None => mq::Request::end_ok(rq)
-                .map_err(|_e| kernel::error::code::EIO)
-                // We take no refcounts on the request, so we expect to be able to
-                // end the request. The request reference must be unique at this
-                // point, and so `end_ok` cannot fail.
-                .expect("Fatal error - expected to be able to end request"),
-            IRQMode::Soft => mq::Request::complete(rq),
+            IRQMode::None => rq.end_ok(),
+            IRQMode::Soft => mq::Request::complete(rq.into()),
         }
         Ok(())
     }
@@ -145,11 +145,9 @@ fn queue_rq(queue_data: &QueueData, rq: ARef<mq::Request<Self>>, _is_last: bool)
     fn commit_rqs(_queue_data: &QueueData) {}
 
     fn complete(rq: ARef<mq::Request<Self>>) {
-        mq::Request::end_ok(rq)
+        OwnableRefCounted::try_from_shared(rq)
             .map_err(|_e| kernel::error::code::EIO)
-            // We take no refcounts on the request, so we expect to be able to
-            // end the request. The request reference must be unique at this
-            // point, and so `end_ok` cannot fail.
-            .expect("Fatal error - expected to be able to end request");
+            .expect("Failed to complete request")
+            .end_ok();
     }
 }
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 1fd0d54dd549..b8ecd69abe98 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -62,6 +62,7 @@
 //!     new_mutex,
 //!     prelude::*,
 //!     sync::{aref::ARef, Arc, Mutex},
+//!     types::{ForeignOwnable, OwnableRefCounted, Owned},
 //! };
 //!
 //! struct MyBlkDevice;
@@ -70,17 +71,18 @@
 //! impl Operations for MyBlkDevice {
 //!     type QueueData = ();
 //!
-//!     fn queue_rq(_queue_data: (), rq: ARef<Request<Self>>, _is_last: bool) -> Result {
-//!         Request::end_ok(rq);
+//!     fn queue_rq(_queue_data: (), rq: Owned<Request<Self>>, _is_last: bool) -> Result {
+//!         rq.end_ok();
 //!         Ok(())
 //!     }
 //!
 //!     fn commit_rqs(_queue_data: ()) {}
 //!
 //!     fn complete(rq: ARef<Request<Self>>) {
-//!         Request::end_ok(rq)
+//!         OwnableRefCounted::try_from_shared(rq)
 //!             .map_err(|_e| kernel::error::code::EIO)
-//!             .expect("Fatal error - expected to be able to end request");
+//!             .expect("Fatal error - expected to be able to end request")
+//!             .end_ok();
 //!     }
 //! }
 //!
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index 6b2fcd76372e..bb23a32f3983 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -17,11 +17,18 @@
     prelude::*,
     sync::{
         aref::ARef,
+        atomic::ordering,
         Refcount, //
     },
-    types::ForeignOwnable,
+    types::{
+        ForeignOwnable,
+        Owned, //
+    },
+};
+use core::{
+    marker::PhantomData,
+    ptr::NonNull, //
 };
-use core::marker::PhantomData;
 
 type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>;
 
@@ -45,7 +52,7 @@ pub trait Operations: Sized {
     /// `false`, the driver is allowed to defer committing the request.
     fn queue_rq(
         queue_data: ForeignBorrowed<'_, Self::QueueData>,
-        rq: ARef<Request<Self>>,
+        rq: Owned<Request<Self>>,
         is_last: bool,
     ) -> Result;
 
@@ -99,16 +106,23 @@ impl<T: Operations> OperationsVTable<T> {
         // this function.
         let request = unsafe { &*(*bd).rq.cast::<Request<T>>() };
 
-        // One refcount for the ARef, one for being in flight
-        request.wrapper_ref().refcount().set(2);
+        debug_assert!(
+            request
+                .wrapper_ref()
+                .refcount()
+                .as_atomic()
+                .load(ordering::Acquire)
+                == 0
+        );
 
         // SAFETY:
-        //  - We own a refcount that we took above. We pass that to `ARef`.
+        //  - By API contract, we own the request.
         //  - By the safety requirements of this function, `request` is a valid
         //    `struct request` and the private data is properly initialized.
         //  - `rq` will be alive until `blk_mq_end_request` is called and is
-        //    reference counted by `ARef` until then.
-        let rq = unsafe { Request::aref_from_raw((*bd).rq) };
+        //    reference counted by until then.
+        let mut rq =
+            unsafe { Owned::from_raw(NonNull::<Request<T>>::new_unchecked((*bd).rq.cast())) };
 
         // SAFETY: `hctx` is valid as required by this function.
         let queue_data = unsafe { (*(*hctx).queue).queuedata };
@@ -120,7 +134,7 @@ impl<T: Operations> OperationsVTable<T> {
         let queue_data = unsafe { T::QueueData::borrow(queue_data) };
 
         // SAFETY: We have exclusive access and we just set the refcount above.
-        unsafe { Request::start_unchecked(&rq) };
+        unsafe { rq.start_unchecked() };
 
         let ret = T::queue_rq(
             queue_data,
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index cf013b9e2cac..7444de3c8522 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -7,39 +7,45 @@
 use crate::{
     bindings,
     block::mq::Operations,
-    error::Result,
     sync::{
-        aref::{ARef, AlwaysRefCounted, RefCounted},
-        atomic::Relaxed,
+        aref::{
+            ARef,
+            RefCounted, //
+        },
+        atomic::ordering,
         Refcount,
     },
-    types::Opaque,
+    types::{
+        Opaque,
+        Ownable,
+        OwnableRefCounted,
+        Owned, //
+    },
+};
+use core::{
+    marker::PhantomData,
+    ptr::NonNull, //
 };
-use core::{marker::PhantomData, ptr::NonNull};
 
 /// A wrapper around a blk-mq [`struct request`]. This represents an IO request.
 ///
 /// # Implementation details
 ///
-/// There are four states for a request that the Rust bindings care about:
-///
-/// 1. Request is owned by block layer (refcount 0).
-/// 2. Request is owned by driver but with zero [`ARef`]s in existence
-///    (refcount 1).
-/// 3. Request is owned by driver with exactly one [`ARef`] in existence
-///    (refcount 2).
-/// 4. Request is owned by driver with more than one [`ARef`] in existence
-///    (refcount > 2).
+/// There are three states for a request that the Rust bindings care about:
 ///
+/// - 0: The request is owned by C block layer or is uniquely referenced (by [`Owned<_>`]).
+/// - 1: The request is owned by Rust abstractions but is not referenced.
+/// - 2+: There is one or more [`ARef`] instances referencing the request.
 ///
-/// We need to track 1 and 2 to ensure we fail tag to request conversions for
-/// requests that are not owned by the driver.
+/// We need to track 1 and 2 to make sure that `tag_to_rq` does not issue any
+/// [`ARef`] to requests not owned by the driver, or to requests that have a
+/// [`Owned`] referencing it.
 ///
-/// We need to track 3 and 4 to ensure that it is safe to end the request and hand
-/// back ownership to the block layer.
+/// We need to track 3 to know when it is safe to convert an [`ARef`] to a
+/// [`Owned`].
 ///
 /// Note that the driver can still obtain new `ARef` even if there is no `ARef`s in existence by
-/// using `tag_to_rq`, hence the need to distinguish B and C.
+/// using `tag_to_rq`, hence the need to distinct 1 and 2.
 ///
 /// The states are tracked through the private `refcount` field of
 /// `RequestDataWrapper`. This structure lives in the private data area of the C
@@ -66,6 +72,7 @@ impl<T: Operations> Request<T> {
     ///
     /// * The caller must own a refcount on `ptr` that is transferred to the
     ///   returned [`ARef`].
+    /// * The refcount must be >= 2.
     /// * The type invariants for [`Request`] must hold for the pointee of `ptr`.
     ///
     /// [`struct request`]: srctree/include/linux/blk-mq.h
@@ -76,72 +83,6 @@ pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef<Self> {
         unsafe { ARef::from_raw(NonNull::new_unchecked(ptr.cast())) }
     }
 
-    /// Notify the block layer that a request is going to be processed now.
-    ///
-    /// The block layer uses this hook to do proper initializations such as
-    /// starting the timeout timer. It is a requirement that block device
-    /// drivers call this function when starting to process a request.
-    ///
-    /// # Safety
-    ///
-    /// The caller must have exclusive ownership of `self`, that is
-    /// `self.wrapper_ref().refcount() == 2`.
-    pub(crate) unsafe fn start_unchecked(this: &ARef<Self>) {
-        // SAFETY: By type invariant, `self.0` is a valid `struct request` and
-        // we have exclusive access.
-        unsafe { bindings::blk_mq_start_request(this.0.get()) };
-    }
-
-    /// Try to take exclusive ownership of `this` by dropping the refcount to 0.
-    /// This fails if `this` is not the only [`ARef`] pointing to the underlying
-    /// [`Request`].
-    ///
-    /// If the operation is successful, [`Ok`] is returned with a pointer to the
-    /// C [`struct request`]. If the operation fails, `this` is returned in the
-    /// [`Err`] variant.
-    ///
-    /// [`struct request`]: srctree/include/linux/blk-mq.h
-    fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> {
-        // To hand back the ownership, we need the current refcount to be 2.
-        // Since we can race with `TagSet::tag_to_rq`, this needs to atomically reduce
-        // refcount to 0. `Refcount` does not provide a way to do this, so use the underlying
-        // atomics directly.
-        if let Err(_old) = this
-            .wrapper_ref()
-            .refcount()
-            .as_atomic()
-            .cmpxchg(2, 0, Relaxed)
-        {
-            return Err(this);
-        }
-
-        let request_ptr = this.0.get();
-        core::mem::forget(this);
-
-        Ok(request_ptr)
-    }
-
-    /// Notify the block layer that the request has been completed without errors.
-    ///
-    /// This function will return [`Err`] if `this` is not the only [`ARef`]
-    /// referencing the request.
-    pub fn end_ok(this: ARef<Self>) -> Result<(), ARef<Self>> {
-        let request_ptr = Self::try_set_end(this)?;
-
-        // SAFETY: By type invariant, `this.0` was a valid `struct request`. The
-        // success of the call to `try_set_end` guarantees that there are no
-        // `ARef`s pointing to this request. Therefore it is safe to hand it
-        // back to the block layer.
-        unsafe {
-            bindings::blk_mq_end_request(
-                request_ptr,
-                bindings::BLK_STS_OK as bindings::blk_status_t,
-            )
-        };
-
-        Ok(())
-    }
-
     /// Complete the request by scheduling `Operations::complete` for
     /// execution.
     ///
@@ -234,27 +175,125 @@ unsafe impl<T: Operations> Sync for Request<T> {}
 // matching reference count decrement is executed.
 unsafe impl<T: Operations> RefCounted for Request<T> {
     fn inc_ref(&self) {
-        self.wrapper_ref().refcount().inc();
+        let refcount = &self.wrapper_ref().refcount().as_atomic();
+
+        // Load acquire, store relaxed. We sync with store release of
+        // `OwnableRefCounted::into_shared`. After that all unique references are dead and we have
+        // shared access. We can use relaxed ordering for the store.
+        #[cfg_attr(not(debug_assertions), allow(unused_variables))]
+        let old = refcount.fetch_add(1, ordering::Acquire);
+
+        debug_assert!(old >= 1, "Request refcount zero clone");
     }
 
     unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
-        // SAFETY: The type invariants of `ARef` guarantee that `obj` is valid
+        // SAFETY: The type invariants of `RefCounted` guarantee that `obj` is valid
         // for read.
         let wrapper_ptr = unsafe { Self::wrapper_ptr(obj.as_ptr()).as_ptr() };
         // SAFETY: The type invariant of `Request` guarantees that the private
         // data area is initialized and valid.
         let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) };
 
-        #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
-        let is_zero = refcount.dec_and_test();
+        // Store release to sync with load acquire in
+        // `OwnableRefCounted::try_from_shared`.
+        #[cfg_attr(not(debug_assertions), allow(unused_variables))]
+        let old = refcount.as_atomic().fetch_sub(1, ordering::Release);
 
-        #[cfg(CONFIG_DEBUG_MISC)]
-        if is_zero {
-            panic!("Request reached refcount zero in Rust abstractions");
-        }
+        debug_assert!(
+            old > 1,
+            "Request reached refcount zero in Rust abstractions"
+        );
+    }
+}
+
+impl<T: Operations> Owned<Request<T>> {
+    /// Notify the block layer that a request is going to be processed now.
+    ///
+    /// The block layer uses this hook to do proper initializations such as
+    /// starting the timeout timer. It is a requirement that block device
+    /// drivers call this function when starting to process a request.
+    ///
+    /// # Safety
+    ///
+    /// The caller must have exclusive ownership of `self`, that is
+    /// `self.wrapper_ref().refcount() == 0`.
+    ///
+    /// This can only be called once in the request life cycle.
+    pub(crate) unsafe fn start_unchecked(&mut self) {
+        // SAFETY: By type invariant, `self.0` is a valid `struct request` and
+        // we have exclusive access.
+        unsafe { bindings::blk_mq_start_request(self.0.get()) };
+    }
+
+    /// Notify the block layer that the request has been completed without errors.
+    pub fn end_ok(self) {
+        let request_ptr = self.0.get().cast();
+        core::mem::forget(self);
+        // SAFETY: By type invariant, `this.0` was a valid `struct request`. The
+        // existence of `self` guarantees that there are no `ARef`s pointing to
+        // this request. Therefore it is safe to hand it back to the block
+        // layer.
+        unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK) };
     }
 }
 
-// SAFETY: We currently do not implement `Ownable`, thus it is okay to obtain an `ARef<Request>`
-// from a `&Request` (but this will change in the future).
-unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {}
+impl<T: Operations> Ownable for Request<T> {
+    // The `release` implementation frees the underlying request according to the reference
+    // counting scheme for `Request`.
+    unsafe fn release(&mut self) {
+        // SAFETY: The safety requirements of this function guarantee that `self`
+        // is valid for read.
+        let wrapper_ptr = unsafe { Self::wrapper_ptr(self).as_ptr() };
+        // SAFETY: The type invariant of `Request` guarantees that the private
+        // data area is initialized and valid.
+        let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) };
+
+        // Store release to sync with load acquire when converting back to owned.
+        #[cfg_attr(not(debug_assertions), allow(unused_variables))]
+        let old = refcount.as_atomic().fetch_add(1, ordering::Release);
+
+        debug_assert!(
+            old == 0,
+            "Invalid refcount when releasing `Owned<Request<T>>`"
+        );
+    }
+}
+
+impl<T: Operations> OwnableRefCounted for Request<T> {
+    fn try_from_shared(this: ARef<Self>) -> core::result::Result<Owned<Self>, ARef<Self>> {
+        // Load acquire to sync with decrement store release to make sure all
+        // shared access has ended.
+        let updated = this
+            .wrapper_ref()
+            .refcount()
+            .as_atomic()
+            .cmpxchg(2, 0, ordering::Acquire);
+
+        match updated {
+            Ok(_) => Ok(
+                // SAFETY: We achieved unique ownership above.
+                unsafe { Owned::from_raw(ARef::into_raw(this)) },
+            ),
+            Err(_) => Err(this),
+        }
+    }
+
+    fn into_shared(this: Owned<Self>) -> ARef<Self> {
+        // Store release to sync with future increments using load acquire to
+        // make sure exclusive access has ended before shared access start.
+        #[cfg_attr(not(debug_assertions), allow(unused_variables))]
+        let old = this
+            .wrapper_ref()
+            .refcount()
+            .as_atomic()
+            .fetch_add(2, ordering::Release);
+
+        debug_assert!(
+            old == 0,
+            "Invalid refcount when upgrading `Owned<Request<T>>`"
+        );
+
+        // SAFETY: We incremented the refcount above.
+        unsafe { ARef::from_raw(Owned::into_raw(this)) }
+    }
+}

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 33/83] block: rust: add `TagSet` private data support
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux, Andreas Hindborg
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

From: Andreas Hindborg <a.hindborg@samsung.com>

C block device drivers can attach private data to a `struct
blk_mq_tag_set`. Add support for this feature for Rust block device
drivers via the `Operations::TagSetData` associated type.

The private data is passed to `TagSet::new` and is stored in the
`driver_data` field of the underlying `struct blk_mq_tag_set`. It is
released when the `TagSet` is dropped.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/rnull.rs       |  3 ++-
 rust/kernel/block/mq.rs            |  6 ++++--
 rust/kernel/block/mq/operations.rs |  4 ++++
 rust/kernel/block/mq/tag_set.rs    | 26 ++++++++++++++++++++++----
 4 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index be0b4bd25e53..ad26a4a8dbbe 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -240,7 +240,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
         }
 
         let tagset = Arc::pin_init(
-            TagSet::new(submit_queues, 256, 1, numa_node, flags),
+            TagSet::new(submit_queues, (), 256, 1, numa_node, flags),
             GFP_KERNEL,
         )?;
 
@@ -533,6 +533,7 @@ fn align_down<T>(value: T, to: T) -> T
 impl Operations for NullBlkDevice {
     type QueueData = Pin<KBox<QueueData>>;
     type RequestData = Pdu;
+    type TagSetData = ();
 
     fn new_request_data() -> impl PinInit<Self::RequestData> {
         pin_init!(Pdu {
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index bac15b509d90..28cee0d60846 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -71,6 +71,7 @@
 //! impl Operations for MyBlkDevice {
 //!     type RequestData = ();
 //!     type QueueData = ();
+//!     type TagSetData = ();
 //!
 //!     fn new_request_data(
 //!     ) -> impl PinInit<()> {
@@ -94,8 +95,9 @@
 //!
 //! let tagset: Arc<TagSet<MyBlkDevice>> =
 //!     Arc::pin_init(
-//!         TagSet::new(1, 256, 1, NumaNode::NO_NODE, mq::tag_set::Flags::default()),
-//!         GFP_KERNEL)?;
+//!         TagSet::new(1, (), 256, 1, NumaNode::NO_NODE, mq::tag_set::Flags::default()),
+//!         GFP_KERNEL
+//!     )?;
 //! let mut disk = gen_disk::GenDiskBuilder::new()
 //!     .capacity_sectors(4096)
 //!     .build(fmt!("myblk"), tagset, ())?;
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index c49ca2e8bbb2..093bb21fa1b2 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -63,6 +63,10 @@ pub trait Operations: Sized {
     /// the `GenDisk` associated with this `Operations` implementation.
     type QueueData: ForeignOwnable + Sync;
 
+    /// Data associated with a `TagSet`. This is stored as a pointer in `struct
+    /// blk_mq_tag_set`.
+    type TagSetData: ForeignOwnable + Sync;
+
     /// Called by the kernel to get an initializer for a `Pin<&mut RequestData>`.
     fn new_request_data() -> impl PinInit<Self::RequestData>;
 
diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
index d6d104adf4aa..bfb8f8af4ee1 100644
--- a/rust/kernel/block/mq/tag_set.rs
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -19,7 +19,10 @@
         Result, //
     },
     prelude::*,
-    types::Opaque,
+    types::{
+        ForeignOwnable,
+        Opaque, //
+    },
 };
 use core::{
     convert::TryInto,
@@ -56,6 +59,7 @@ impl<T: Operations> TagSet<T> {
     /// Try to create a new tag set
     pub fn new(
         nr_hw_queues: u32,
+        tagset_data: T::TagSetData,
         num_tags: u32,
         num_maps: u32,
         numa_node: NumaNode,
@@ -73,7 +77,7 @@ pub fn new(
                     queue_depth: num_tags,
                     cmd_size,
                     flags: flags.into(),
-                    driver_data: core::ptr::null_mut::<c_void>(),
+                    driver_data: tagset_data.into_foreign(),
                     nr_maps: num_maps,
                     ..tag_set
                 }
@@ -86,7 +90,14 @@ pub fn new(
                 // SAFETY: we do not move out of `tag_set`.
                 let tag_set: &mut Opaque<_> = unsafe { Pin::get_unchecked_mut(tag_set) };
                 // SAFETY: `tag_set` is a reference to an initialized `blk_mq_tag_set`.
-                error::to_result( unsafe { bindings::blk_mq_alloc_tag_set(tag_set.get())})
+                let status = error::to_result(
+                    unsafe { bindings::blk_mq_alloc_tag_set(tag_set.get())}
+                );
+                if status.is_err() {
+                    // SAFETY: We created `driver_data` above with `into_foreign`
+                    unsafe { T::TagSetData::from_foreign((*tag_set.get()).driver_data) };
+                }
+                status
             }),
             _p: PhantomData,
         })
@@ -102,7 +113,14 @@ pub(crate) fn raw_tag_set(&self) -> *mut bindings::blk_mq_tag_set {
 impl<T: Operations> PinnedDrop for TagSet<T> {
     fn drop(self: Pin<&mut Self>) {
         // SAFETY: By type invariant `inner` is valid and has been properly
-        // initialized during construction.
+        // initialised during construction.
+        let tagset_data = unsafe { (*self.inner.get()).driver_data };
+
+        // SAFETY: `inner` is valid and has been properly initialised during construction.
         unsafe { bindings::blk_mq_free_tag_set(self.inner.get()) };
+
+        // SAFETY: `tagset_data` was created by a call to
+        // `ForeignOwnable::into_foreign` in `TagSet::try_new()`
+        unsafe { T::TagSetData::from_foreign(tagset_data) };
     }
 }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 24/83] block: rust: add `NoDefaultScheduler` flag for `TagSet`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add a flag that maps to the BLK_MQ_F_NO_SCHED_BY_DEFAULT. This flag selects
the 'none' scheduler during queue registration in case of a single hwq or
shared hwqs instead of 'mq-deadline'.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq/tag_set/flags.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/rust/kernel/block/mq/tag_set/flags.rs b/rust/kernel/block/mq/tag_set/flags.rs
index b7eaccd200a2..2561d7090c49 100644
--- a/rust/kernel/block/mq/tag_set/flags.rs
+++ b/rust/kernel/block/mq/tag_set/flags.rs
@@ -17,5 +17,9 @@ pub enum Flag {
         /// processing IO. When this flag is not set, IO is processed in atomic
         /// context. When this flag is set, IO is processed in process context.
         Blocking = bindings::BLK_MQ_F_BLOCKING,
+
+        /// Select 'none' during queue registration in case of a single hwq or shared
+        /// hwqs instead of 'mq-deadline'.
+        NoDefaultScheduler = bindings::BLK_MQ_F_NO_SCHED_BY_DEFAULT,
     }
 }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 10/83] block: rust: allow `hrtimer::Timer` in `RequestData`
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

`Request` is essentially a smart pointer that derefs to
`Operations::RequestData`. To use an `HrTimer` in `Operations::RequestData`
via the `Request` pointer, we must implement `HrTimerPointer` for
`Request`.

Thus, implement `HrTimerPointer` and friends for `ARef<Request>`.

Publicly export `HrTimer::raw_cancel` and `HrTimer::into_c`.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq.rs         |   5 +-
 rust/kernel/block/mq/request.rs | 142 ++++++++++++++++++++++++++++++++++++++++
 rust/kernel/time/hrtimer.rs     |   5 +-
 3 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 7718b106eb49..a03d46d274a5 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -107,5 +107,8 @@
 mod tag_set;
 
 pub use operations::Operations;
-pub use request::Request;
+pub use request::{
+    Request,
+    RequestTimerHandle, //
+};
 pub use tag_set::TagSet;
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index a6e757d8755d..0b14f584c9d9 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -15,6 +15,14 @@
         atomic::ordering,
         Refcount,
     },
+    time::hrtimer::{
+        HasHrTimer,
+        HrTimer,
+        HrTimerCallback,
+        HrTimerHandle,
+        HrTimerMode,
+        HrTimerPointer, //
+    },
     types::{
         Opaque,
         Ownable,
@@ -23,6 +31,7 @@
     },
 };
 use core::{
+    ffi::c_void,
     marker::PhantomData,
     ptr::NonNull, //
 };
@@ -145,6 +154,11 @@ pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper<T> {
         // valid as a shared reference.
         unsafe { Self::wrapper_ptr(core::ptr::from_ref(self).cast_mut()).as_ref() }
     }
+
+    /// Return a reference to the per-request data associated with this request.
+    pub fn data_ref(&self) -> &T::RequestData {
+        &self.wrapper_ref().data
+    }
 }
 
 /// A wrapper around data stored in the private area of the C [`struct request`].
@@ -329,3 +343,131 @@ fn into_shared(this: Owned<Self>) -> ARef<Self> {
         unsafe { ARef::from_raw(Owned::into_raw(this)) }
     }
 }
+
+/// A handle for a timer that is embedded in a [`Request`] private data area.
+pub struct RequestTimerHandle<T>
+where
+    T: Operations,
+    T::RequestData: HasHrTimer<T::RequestData>,
+{
+    inner: ARef<Request<T>>,
+}
+
+// SAFETY: The drop implementation of `RequestTimerHandle` calls `cancel`, which cancels the timer
+// if it is running. `drop` will block if the timer handler is running. This is ensured via a call
+// to `HrTimer::raw_cancel` in the implementation of `cancel`.
+unsafe impl<T> HrTimerHandle for RequestTimerHandle<T>
+where
+    T: Operations,
+    T::RequestData: HasHrTimer<T::RequestData>,
+{
+    fn cancel(&mut self) -> bool {
+        let request_data_ptr = &self.inner.wrapper_ref().data as *const T::RequestData;
+
+        // SAFETY: As we obtained `self_ptr` from a valid reference above, it
+        // must point to a valid `U`.
+        let timer_ptr = unsafe {
+            <T::RequestData as HasHrTimer<T::RequestData>>::raw_get_timer(request_data_ptr)
+        };
+
+        // SAFETY: As `timer_ptr` points into `U` and `U` is valid, `timer_ptr`
+        // must point to a valid `HrTimer` instance.
+        unsafe { HrTimer::<T::RequestData>::raw_cancel(timer_ptr) }
+    }
+}
+
+impl<T> RequestTimerHandle<T>
+where
+    T: Operations,
+    T::RequestData: HasHrTimer<T::RequestData>,
+{
+    /// Drop the timer handle without cancelling the timer.
+    ///
+    /// This is safe because dropping the last [`ARef<Request>`] does not drop the [`Request`].
+    pub fn dismiss(mut self) {
+        let inner = core::ptr::from_mut(&mut self.inner);
+
+        // SAFETY: `inner` is valid for reads and writes, is properly aligned and nonnull. We have
+        // exclusive access to `inner` and we do not access `inner` after this call.
+        unsafe { core::ptr::drop_in_place(inner) };
+        core::mem::forget(self);
+    }
+}
+
+impl<T> Drop for RequestTimerHandle<T>
+where
+    T: Operations,
+    T::RequestData: HasHrTimer<T::RequestData>,
+{
+    fn drop(&mut self) {
+        self.cancel();
+    }
+}
+
+impl<T> HrTimerPointer for ARef<Request<T>>
+where
+    T: Operations,
+    T::RequestData: HasHrTimer<T::RequestData>,
+    T::RequestData: Sync,
+{
+    type TimerMode = <T::RequestData as HasHrTimer<T::RequestData>>::TimerMode;
+    type TimerHandle = RequestTimerHandle<T>;
+
+    fn start(self, expires: <Self::TimerMode as HrTimerMode>::Expires) -> RequestTimerHandle<T> {
+        let pdu_ptr = self.data_ref() as *const T::RequestData;
+
+        // SAFETY: `pdu_pointer` is coerced from a live reference to a `T` and this points to a
+        // valid `T`. The reference is valid until `T` is dropped, and the timer will be canceled
+        // before this.
+        unsafe { T::RequestData::start(pdu_ptr, expires) };
+
+        RequestTimerHandle { inner: self }
+    }
+}
+
+impl<T> kernel::time::hrtimer::RawHrTimerCallback for ARef<Request<T>>
+where
+    T: Operations,
+    T::RequestData: HasHrTimer<T::RequestData>,
+    T::RequestData: for<'a> HrTimerCallback<Pointer<'a> = ARef<Request<T>>>,
+    T::RequestData: Sync,
+{
+    type CallbackTarget<'a> = Self;
+
+    unsafe extern "C" fn run(ptr: *mut bindings::hrtimer) -> bindings::hrtimer_restart {
+        // `HrTimer` is `repr(transparent)`
+        let timer_ptr = ptr.cast::<kernel::time::hrtimer::HrTimer<T::RequestData>>();
+
+        // SAFETY: By C API contract `ptr` is the pointer we passed when
+        // enqueuing the timer, so it is a `HrTimer<T::RequestData>` embedded in a `T::RequestData`
+        let request_data_ptr = unsafe { T::RequestData::timer_container_of(timer_ptr) };
+
+        let offset = core::mem::offset_of!(RequestDataWrapper<T>, data);
+
+        // SAFETY: This sub stays within the `bindings::request` allocation and does not wrap.
+        let pdu_ptr = unsafe {
+            request_data_ptr
+                .cast::<u8>()
+                .sub(offset)
+                .cast::<RequestDataWrapper<T>>()
+        };
+
+        // SAFETY: This request pointer was passed to us by the kernel in `init_request_callback`.
+        let request_ptr = unsafe { bindings::blk_mq_rq_from_pdu(pdu_ptr.cast::<c_void>()) };
+
+        // SAFETY: By C API contract, we have ownership of the request.
+        let request_ref = unsafe { &*(request_ptr as *const Request<T>) };
+
+        request_ref.inc_ref();
+        // SAFETY: We just incremented the refcount above.
+        let aref: ARef<Request<T>> = unsafe { ARef::from_raw(NonNull::from(request_ref)) };
+
+        // SAFETY:
+        // - By C API contract `timer_ptr` is the pointer that we passed when queuing the timer, so
+        //   it is a valid pointer to a `HrTimer<T>` embedded in a `T`.
+        // - We are within `RawHrTimerCallback::run`
+        let context = unsafe { kernel::time::hrtimer::HrTimerCallbackContext::from_raw(timer_ptr) };
+
+        T::RequestData::run(aref, context).into_c()
+    }
+}
diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs
index d57276496ed6..096b18523c73 100644
--- a/rust/kernel/time/hrtimer.rs
+++ b/rust/kernel/time/hrtimer.rs
@@ -496,7 +496,7 @@ unsafe fn raw_get(this: *const Self) -> *mut bindings::hrtimer {
     /// # Safety
     ///
     /// `this` must point to a valid `Self`.
-    pub(crate) unsafe fn raw_cancel(this: *const Self) -> bool {
+    pub unsafe fn raw_cancel(this: *const Self) -> bool {
         // SAFETY: `this` points to an allocation of at least `HrTimer` size.
         let c_timer_ptr = unsafe { HrTimer::raw_get(this) };
 
@@ -900,7 +900,8 @@ pub enum HrTimerRestart {
 }
 
 impl HrTimerRestart {
-    fn into_c(self) -> bindings::hrtimer_restart {
+    /// Convert `self` into an integer for FFI use.
+    pub fn into_c(self) -> bindings::hrtimer_restart {
         self as bindings::hrtimer_restart
     }
 }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 29/83] block: rnull: add badblocks support
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add badblocks support to the rnull driver with a configfs interface for
managing bad sectors.

- Configfs attribute for adding/removing bad blocks via "+start-end" and
  "-start-end" syntax.
- Request handling that checks for bad blocks and returns IO errors.
- Updated request completion to handle error status properly.

The badblocks functionality is disabled by default and is enabled when
first bad block is added.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs | 63 ++++++++++++++++++++++++++++++++++++++---
 drivers/block/rnull/rnull.rs    | 46 ++++++++++++++++++++++++++----
 2 files changed, 100 insertions(+), 9 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index d9aead646ae0..4db3ba26c2d1 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -6,9 +6,12 @@
 };
 use kernel::{
     bindings,
-    block::mq::gen_disk::{
-        GenDisk,
-        GenDiskBuilder, //
+    block::{
+        badblocks::BadBlocks,
+        mq::gen_disk::{
+            GenDisk,
+            GenDiskBuilder, //
+        }, //
     },
     configfs::{
         self,
@@ -26,7 +29,10 @@
         kstrtobool_bytes,
         CString, //
     },
-    sync::Mutex,
+    sync::{
+        Arc,
+        Mutex, //
+    },
     time, //
 };
 use macros::{
@@ -95,6 +101,7 @@ fn make_group(
                 home_node: 9,
                 discard: 10,
                 no_sched:11,
+                badblocks: 12,
             ],
         };
 
@@ -117,6 +124,7 @@ fn make_group(
                     home_node: bindings::NUMA_NO_NODE,
                     discard: false,
                     no_sched: false,
+                    bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
                 }),
             }),
             core::iter::empty(),
@@ -186,6 +194,7 @@ struct DeviceConfigInner {
     home_node: i32,
     discard: bool,
     no_sched: bool,
+    bad_blocks: Arc<BadBlocks>,
 }
 
 #[vtable]
@@ -221,6 +230,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
                 home_node: guard.home_node,
                 discard: guard.discard,
                 no_sched: guard.no_sched,
+                bad_blocks: guard.bad_blocks.clone(),
             })?);
             guard.powered = true;
         } else if guard.powered && !power_op {
@@ -328,3 +338,48 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
 );
 
 configfs_simple_bool_field!(DeviceConfig, 11, no_sched);
+
+#[vtable]
+impl configfs::AttributeOperations<12> for DeviceConfig {
+    type Data = DeviceConfig;
+
+    fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+        let ret = this.data.lock().bad_blocks.show(page, false);
+        if ret < 0 {
+            Err(Error::from_errno(ret as c_int))
+        } else {
+            Ok(ret as usize)
+        }
+    }
+
+    fn store(this: &DeviceConfig, page: &[u8]) -> Result {
+        // This attribute can be set while device is powered.
+
+        for line in core::str::from_utf8(page)?.lines() {
+            let mut chars = line.chars();
+            match chars.next() {
+                Some(sign @ '+' | sign @ '-') => {
+                    if let Some((start, end)) = chars.as_str().split_once('-') {
+                        let start: u64 = start.parse().map_err(|_| EINVAL)?;
+                        let end: u64 = end.parse().map_err(|_| EINVAL)?;
+
+                        if start > end {
+                            return Err(EINVAL);
+                        }
+
+                        this.data.lock().bad_blocks.enable();
+
+                        if sign == '+' {
+                            this.data.lock().bad_blocks.set_bad(start..=end, true)?;
+                        } else {
+                            this.data.lock().bad_blocks.set_good(start..=end)?;
+                        }
+                    }
+                }
+                _ => return Err(EINVAL),
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 73f14d6e379f..90dbf318c2f8 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -9,6 +9,7 @@
     bindings,
     block::{
         self,
+        badblocks::{self, BadBlocks},
         bio::Segment,
         mq::{
             self,
@@ -38,6 +39,10 @@
     str::CString,
     sync::{
         aref::ARef,
+        atomic::{
+            ordering,
+            Atomic, //
+        },
         Arc,
         Mutex, //
     },
@@ -153,6 +158,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                     home_node: module_parameters::home_node.value(),
                     discard: module_parameters::discard.value(),
                     no_sched: module_parameters::no_sched.value(),
+                    bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
                 })?;
                 disks.push(disk, GFP_KERNEL)?;
             }
@@ -179,6 +185,7 @@ struct NullBlkOptions<'a> {
     home_node: i32,
     discard: bool,
     no_sched: bool,
+    bad_blocks: Arc<BadBlocks>,
 }
 struct NullBlkDevice;
 
@@ -196,6 +203,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
             home_node,
             discard,
             no_sched,
+            bad_blocks,
         } = options;
 
         let mut flags = mq::tag_set::Flags::default();
@@ -237,6 +245,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
                 completion_time,
                 memory_backed,
                 block_size: block_size.into(),
+                bad_blocks,
             }),
             GFP_KERNEL,
         )?;
@@ -351,6 +360,16 @@ fn transfer(
         }
         Ok(())
     }
+
+    fn end_request(rq: Owned<mq::Request<Self>>) {
+        let status = rq.data_ref().error.load(ordering::Relaxed);
+        rq.data_ref().error.store(0, ordering::Relaxed);
+
+        match status {
+            0 => rq.end_ok(),
+            _ => rq.end(bindings::BLK_STS_IOERR),
+        }
+    }
 }
 
 static_assert!((PAGE_SIZE >> SECTOR_SHIFT) <= 64);
@@ -396,12 +415,14 @@ struct QueueData {
     completion_time: Delta,
     memory_backed: bool,
     block_size: u64,
+    bad_blocks: Arc<BadBlocks>,
 }
 
 #[pin_data]
 struct Pdu {
     #[pin]
     timer: kernel::time::hrtimer::HrTimer<Self>,
+    error: Atomic<u32>,
 }
 
 impl HrTimerCallback for Pdu {
@@ -431,6 +452,7 @@ impl Operations for NullBlkDevice {
     fn new_request_data() -> impl PinInit<Self::RequestData> {
         pin_init!(Pdu {
             timer <- kernel::time::hrtimer::HrTimer::new(),
+            error: Atomic::new(0),
         })
     }
 
@@ -440,6 +462,19 @@ fn queue_rq(
         mut rq: Owned<mq::Request<Self>>,
         _is_last: bool,
     ) -> Result {
+        if queue_data.bad_blocks.enabled() {
+            let start = rq.sector();
+            let end = start + u64::from(rq.sectors());
+            if !matches!(
+                queue_data.bad_blocks.check(start..end),
+                badblocks::BlockStatus::None
+            ) {
+                rq.data_ref().error.store(1, ordering::Relaxed);
+            }
+        }
+
+        // TODO: Skip IO if bad block.
+
         if queue_data.memory_backed {
             memalloc_scope!(let _noio: NoIo);
             let tree = &queue_data.tree;
@@ -461,7 +496,7 @@ fn queue_rq(
         }
 
         match queue_data.irq_mode {
-            IRQMode::None => rq.end_ok(),
+            IRQMode::None => Self::end_request(rq),
             IRQMode::Soft => mq::Request::complete(rq.into()),
             IRQMode::Timer => {
                 OwnableRefCounted::into_shared(rq)
@@ -475,9 +510,10 @@ fn queue_rq(
     fn commit_rqs(_queue_data: Pin<&QueueData>) {}
 
     fn complete(rq: ARef<mq::Request<Self>>) {
-        OwnableRefCounted::try_from_shared(rq)
-            .map_err(|_e| kernel::error::code::EIO)
-            .expect("Failed to complete request")
-            .end_ok();
+        Self::end_request(
+            OwnableRefCounted::try_from_shared(rq)
+                .map_err(|_e| kernel::error::code::EIO)
+                .expect("Failed to complete request"),
+        )
     }
 }

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 09/83] block: rust: document the lifetime of `Request`
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

The `struct request` objects backing a `Request` are not allocated and
freed for each IO. Instead, a fixed pool of requests is allocated when
the tag set is initialized, and each request is reused to service many
distinct IO operations over the lifetime of the request queue. It is
easy to assume from the existing documentation that a request, and in
particular its private data, is fresh for each IO.

Add a `Lifetime` section to the `Request` documentation describing this
reuse and its consequence for the lifetime of the request private data.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq/request.rs | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index 1882d697dcf3..a6e757d8755d 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -29,6 +29,24 @@
 
 /// A wrapper around a blk-mq [`struct request`]. This represents an IO request.
 ///
+/// # Lifetime
+///
+/// The [`struct request`] backing a [`Request`] is not allocated and freed for
+/// each IO. Instead, a fixed pool of requests is allocated up front when the
+/// [`TagSet`](crate::block::mq::TagSet) is initialized, with one request per
+/// available tag. A single request allocation is then reused to service many
+/// distinct IO operations over the lifetime of the request queue: when the
+/// block layer needs to process an IO, it assigns a free tag and hands the
+/// driver the associated request, and once that IO completes the request is
+/// returned to the pool to later be handed out again for an unrelated IO.
+///
+/// The private data area of the request, which holds the driver defined
+/// [`Operations::RequestData`], shares this lifetime. It is initialized once
+/// when the request pool is allocated and dropped once when the pool is torn
+/// down - not once per IO. As a result, [`Operations::RequestData`] persists
+/// across the many IO operations that reuse the same request, and a driver must
+/// not assume that it is reset to a fresh value at the start of each IO.
+///
 /// # Implementation details
 ///
 /// There are three states for a request that the Rust bindings care about:

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 62/83] block: rust: allow setting write cache and FUA flags for `GenDisk`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add methods to `GenDiskBuilder` for enabling the write cache and FUA
feature flags. These flags are set in the `queue_limits` structure
when building the disk.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/kernel/block/mq/gen_disk.rs | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index eedba691e167..5367ca92b7aa 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -9,6 +9,7 @@
     bindings,
     block::mq::{
         operations::OperationsVTable,
+        Feature,
         Operations,
         RequestQueue,
         TagSet, //
@@ -55,6 +56,8 @@ pub struct GenDiskBuilder<T> {
     zone_size_sectors: u32,
     #[cfg(CONFIG_BLK_DEV_ZONED)]
     zone_append_max_sectors: u32,
+    write_cache: bool,
+    forced_unit_access: bool,
     _p: PhantomData<T>,
 }
 
@@ -72,6 +75,8 @@ fn default() -> Self {
             zone_size_sectors: 0,
             #[cfg(CONFIG_BLK_DEV_ZONED)]
             zone_append_max_sectors: 0,
+            write_cache: false,
+            forced_unit_access: false,
             _p: PhantomData,
         }
     }
@@ -164,6 +169,18 @@ pub fn zone_append_max(mut self, sectors: u32) -> Self {
         self
     }
 
+    /// Declare that this device supports forced unit access.
+    pub fn forced_unit_access(mut self, enable: bool) -> Self {
+        self.forced_unit_access = enable;
+        self
+    }
+
+    /// Declare that this device has a write-back cache.
+    pub fn write_cache(mut self, enable: bool) -> Self {
+        self.write_cache = enable;
+        self
+    }
+
     /// Build a new `GenDisk` and add it to the VFS.
     pub fn build(
         self,
@@ -183,7 +200,7 @@ pub fn build(
         lim.physical_block_size = self.physical_block_size;
         lim.max_hw_discard_sectors = self.max_hw_discard_sectors;
         if self.rotational {
-            lim.features |= bindings::BLK_FEAT_ROTATIONAL;
+            lim.features = Feature::Rotational.into();
         }
 
         #[cfg(CONFIG_BLK_DEV_ZONED)]
@@ -192,11 +209,19 @@ pub fn build(
                 return Err(error::code::EINVAL);
             }
 
-            lim.features |= bindings::BLK_FEAT_ZONED;
+            lim.features |= Feature::Zoned;
             lim.chunk_sectors = self.zone_size_sectors;
             lim.max_hw_zone_append_sectors = self.zone_append_max_sectors;
         }
 
+        if self.write_cache {
+            lim.features |= Feature::WriteCache;
+        }
+
+        if self.forced_unit_access {
+            lim.features |= Feature::ForcedUnitAccess;
+        }
+
         // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
         let gendisk = from_err_ptr(unsafe {
             bindings::__blk_mq_alloc_disk(

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 42/83] block: rust: require `queue_rq` to return a `BlkResult`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Change the return type of `Operations::queue_rq` from `Result` to
`BlkResult`. This ensures that drivers return proper block layer status
codes that can be translated to the appropriate `blk_status_t` value.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/rnull.rs       |  3 ++-
 rust/kernel/block/mq.rs            |  4 ++--
 rust/kernel/block/mq/operations.rs | 13 ++++++++-----
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index bb8c4df08218..6ceba23a4d3e 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -20,6 +20,7 @@
             BadBlocks, //
         },
         bio::Segment,
+        error::BlkResult,
         mq::{
             self,
             gen_disk::{
@@ -595,7 +596,7 @@ fn queue_rq(
         this: Pin<&Self>,
         rq: Owned<mq::IdleRequest<Self>>,
         _is_last: bool,
-    ) -> Result {
+    ) -> BlkResult {
         let mut rq = rq.start();
         let mut sectors = rq.sectors();
 
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index e89eb394001f..503623267b19 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -64,7 +64,7 @@
 //! ```rust
 //! use kernel::{
 //!     alloc::NumaNode,
-//!     block::mq::{self, *},
+//!     block::{error::BlkResult, mq::{self, *}},
 //!     new_mutex,
 //!     prelude::*,
 //!     sync::{aref::ARef, Arc, Mutex},
@@ -90,7 +90,7 @@
 //!         _queue_data: (),
 //!         rq: Owned<IdleRequest<Self>>,
 //!         _is_last: bool
-//!     ) -> Result {
+//!     ) -> BlkResult {
 //!         rq.start().end_ok();
 //!         Ok(())
 //!     }
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index 01917ef213d1..b9a2bf6592b3 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -6,10 +6,13 @@
 
 use crate::{
     bindings,
-    block::mq::{
-        request::RequestDataWrapper,
-        IdleRequest,
-        Request, //
+    block::{
+        error::BlkResult,
+        mq::{
+            request::RequestDataWrapper,
+            IdleRequest,
+            Request, //
+        },
     },
     error::{
         from_result,
@@ -82,7 +85,7 @@ fn queue_rq(
         queue_data: ForeignBorrowed<'_, Self::QueueData>,
         rq: Owned<IdleRequest<Self>>,
         is_last: bool,
-    ) -> Result;
+    ) -> BlkResult;
 
     /// Called by the kernel to indicate that queued requests should be submitted.
     fn commit_rqs(

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 67/83] block: rnull: add an option to change the number of hardware queues
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add a feature to rnull that allows changing the number of simulated
hardware queues during device operation.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs | 117 ++++++++++++++++++++++++++--------------
 drivers/block/rnull/rnull.rs    |  46 ++++++++++------
 2 files changed, 108 insertions(+), 55 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 8195d645ecc6..d9246b9150f4 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -148,7 +148,13 @@ fn make_group(
                     completion_time: time::Delta::ZERO,
                     name: name.try_into()?,
                     memory_backed: false,
-                    submit_queues: 1,
+                    queue_config: Arc::pin_init(
+                        new_mutex!(QueueConfig {
+                            submit_queues: 1,
+                            poll_queues: 0
+                        }),
+                        GFP_KERNEL
+                    )?,
                     home_node: bindings::NUMA_NO_NODE,
                     discard: false,
                     no_sched: false,
@@ -169,7 +175,6 @@ fn make_group(
                     zone_max_open: 0,
                     zone_max_active: 0,
                     zone_append_max_sectors: u32::MAX,
-                    poll_queues: 0,
                     fua: true,
                 }),
             }),
@@ -236,7 +241,7 @@ struct DeviceConfigInner {
     completion_time: time::Delta,
     disk: Option<Arc<GenDisk<NullBlkDevice>>>,
     memory_backed: bool,
-    submit_queues: u32,
+    queue_config: Arc<Mutex<QueueConfig>>,
     home_node: i32,
     discard: bool,
     no_sched: bool,
@@ -257,7 +262,6 @@ struct DeviceConfigInner {
     zone_max_open: u32,
     zone_max_active: u32,
     zone_append_max_sectors: u32,
-    poll_queues: u32,
     fua: bool,
 }
 
@@ -310,9 +314,8 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
                 bandwidth_limit: u64::from(guard.mbps) * 2u64.pow(20),
                 shared_tag_set: guard.shared_tags.then(|| guard.shared_tag_set.clone()),
                 tag_set: crate::TagSetOptions {
-                    submit_queues: guard.submit_queues,
-                    poll_queues: guard.poll_queues,
                     home_node: guard.home_node,
+                    queue_config: guard.queue_config.clone(),
                     blocking: guard.blocking,
                     memory_backed: guard.memory_backed,
                     no_sched: guard.no_sched,
@@ -337,9 +340,17 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
     }
 }
 
-configfs_simple_field!(DeviceConfig, 1,
-                       block_size, u32,
-                       check GenDiskBuilder::<NullBlkDevice>::validate_block_size
+pub(crate) struct QueueConfig {
+    pub(crate) submit_queues: u32,
+    pub(crate) poll_queues: u32,
+}
+
+configfs_simple_field!(
+    DeviceConfig,
+    1,
+    block_size,
+    u32,
+    check GenDiskBuilder::<NullBlkDevice>::validate_block_size
 );
 configfs_simple_bool_field!(DeviceConfig, 2, rotational);
 configfs_simple_field!(DeviceConfig, 3, capacity_mib, u64);
@@ -363,38 +374,44 @@ fn from_str(s: &str) -> Result<Self> {
 
 configfs_simple_bool_field!(DeviceConfig, 6, memory_backed);
 
-#[vtable]
-impl configfs::AttributeOperations<7> for DeviceConfig {
-    type Data = DeviceConfig;
-
-    fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
-        let mut writer = kernel::str::Formatter::new(page);
-        writer.write_fmt(fmt!("{}\n", this.data.lock().submit_queues))?;
-        Ok(writer.bytes_written())
-    }
+configfs_attribute! {
+    DeviceConfig,
+    7,
+    show: |this, page| show_field(this.data.lock().queue_config.lock().submit_queues, page),
+    store: |this,page| {
+        let config_guard = this.data.lock();
+        let mut queue_config = config_guard.queue_config.lock();
 
-    fn store(this: &DeviceConfig, page: &[u8]) -> Result {
-        if this.data.lock().powered {
-            return Err(EBUSY);
+        let text = core::str::from_utf8(page)?.trim();
+        let value = text.parse().map_err(|_| EINVAL)?;
+        if value > kernel::cpu::num_possible_cpus() {
+            return Err(kernel::error::code::EINVAL)
         }
 
-        let text = core::str::from_utf8(page)?.trim();
-        let value = text
-            .parse::<u32>()
-            .map_err(|_| kernel::error::code::EINVAL)?;
+        let old_submit_queues = queue_config.submit_queues;
+        queue_config.submit_queues = value;
+        let total_queue_count = queue_config.submit_queues + queue_config.poll_queues;
+
+        let disk = config_guard.disk.clone();
+
+        drop(queue_config);
+        drop(config_guard);
 
-        if value == 0 || value > kernel::cpu::num_possible_cpus() {
-            return Err(kernel::error::code::EINVAL);
+        if let Some(disk) = &disk {
+            if let Err(e) = disk.tag_set().update_hw_queue_count(total_queue_count) {
+                this.data.lock().queue_config.lock().submit_queues = old_submit_queues;
+                return Err(e);
+            }
         }
 
-        this.data.lock().submit_queues = value;
         Ok(())
-    }
+    },
 }
 
 configfs_attribute!(DeviceConfig, 8,
     show: |this, page| show_field(
-        this.data.lock().submit_queues == kernel::numa::num_online_nodes(), page
+        this.data.lock().queue_config.lock().submit_queues == kernel::numa::num_online_nodes(),
+        page
     ),
     store: |this, page| store_with_power_check(this, page, |data, page| {
         let value = core::str::from_utf8(page)?
@@ -404,7 +421,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
             != 0;
 
         if value {
-            data.submit_queues = kernel::numa::num_online_nodes();
+            data.queue_config.lock().submit_queues = kernel::numa::num_online_nodes();
         }
         Ok(())
     })
@@ -506,17 +523,37 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
 configfs_simple_field!(DeviceConfig, 24, zone_max_open, u32);
 configfs_simple_field!(DeviceConfig, 25, zone_max_active, u32);
 configfs_simple_field!(DeviceConfig, 26, zone_append_max_sectors, u32);
-configfs_simple_field!(
+configfs_attribute! {
     DeviceConfig,
     27,
-    poll_queues,
-    u32,
-    check(|value| {
+    show: |this, page| show_field(this.data.lock().queue_config.lock().poll_queues, page),
+    store: |this,page| {
+        let config_guard = this.data.lock();
+        let mut queue_config = config_guard.queue_config.lock();
+
+        let text = core::str::from_utf8(page)?.trim();
+        let value = text.parse().map_err(|_| EINVAL)?;
         if value > kernel::cpu::num_possible_cpus() {
-            Err(kernel::error::code::EINVAL)
-        } else {
-            Ok(())
+            return Err(kernel::error::code::EINVAL)
         }
-    })
-);
+
+        let old_poll_queues = queue_config.poll_queues;
+        queue_config.poll_queues = value;
+        let total_queue_count = queue_config.submit_queues + queue_config.poll_queues;
+
+        let disk = config_guard.disk.clone();
+
+        drop(queue_config);
+        drop(config_guard);
+
+        if let Some(disk) = &disk {
+            if let Err(e) = disk.tag_set().update_hw_queue_count(total_queue_count) {
+                this.data.lock().queue_config.lock().poll_queues = old_poll_queues;
+                return Err(e);
+            }
+        }
+
+        Ok(())
+    },
+}
 configfs_simple_bool_field!(DeviceConfig, 28, fua);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index c3126b923367..6653db5c069b 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -10,7 +10,10 @@
 #[cfg(CONFIG_BLK_DEV_ZONED)]
 mod zoned;
 
-use configfs::IRQMode;
+use configfs::{
+    IRQMode,
+    QueueConfig, //
+};
 use disk_storage::{
     DiskStorage,
     NullBlockPage,
@@ -224,9 +227,14 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
             let hw_queue_depth = module_parameters::hw_queue_depth.value();
 
             let shared_tag_set = NullBlkDevice::build_tag_set(TagSetOptions {
-                submit_queues,
-                poll_queues,
                 home_node,
+                queue_config: Arc::pin_init(
+                    new_mutex!(QueueConfig {
+                        submit_queues,
+                        poll_queues,
+                    }),
+                    GFP_KERNEL,
+                )?,
                 blocking,
                 memory_backed,
                 no_sched,
@@ -256,9 +264,14 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                         .value()
                         .then(|| shared_tag_set.clone()),
                     tag_set: TagSetOptions {
-                        submit_queues,
-                        poll_queues,
                         home_node,
+                        queue_config: Arc::pin_init(
+                            new_mutex!(QueueConfig {
+                                submit_queues,
+                                poll_queues,
+                            }),
+                            GFP_KERNEL,
+                        )?,
                         blocking,
                         memory_backed,
                         no_sched,
@@ -338,9 +351,8 @@ struct NullBlkDevice {
 }
 
 struct TagSetOptions {
-    submit_queues: u32,
-    poll_queues: u32,
     home_node: i32,
+    queue_config: Arc<Mutex<QueueConfig>>,
     blocking: bool,
     memory_backed: bool,
     no_sched: bool,
@@ -352,9 +364,8 @@ impl NullBlkDevice {
 
     fn build_tag_set(options: TagSetOptions) -> Result<Arc<TagSet<Self>>> {
         let TagSetOptions {
-            submit_queues,
-            poll_queues,
             home_node,
+            queue_config,
             blocking,
             memory_backed,
             no_sched,
@@ -379,14 +390,18 @@ fn build_tag_set(options: TagSetOptions) -> Result<Arc<TagSet<Self>>> {
             flags |= mq::tag_set::Flag::NoDefaultScheduler;
         }
 
+        let queue_config_guard = queue_config.lock();
+        let submit_queues = queue_config_guard.submit_queues;
+        let poll_queues = queue_config_guard.poll_queues;
+        drop(queue_config_guard);
+
         Arc::pin_init(
             TagSet::new(
                 submit_queues + poll_queues,
                 KBox::new(
                     NullBlkTagsetData {
                         queue_depth: hw_queue_depth,
-                        submit_queue_count: submit_queues,
-                        poll_queue_count: poll_queues,
+                        queue_config,
                     },
                     GFP_KERNEL,
                 )?,
@@ -823,8 +838,7 @@ impl HasHrTimer<Self> for Pdu {
 
 struct NullBlkTagsetData {
     queue_depth: u32,
-    submit_queue_count: u32,
-    poll_queue_count: u32,
+    queue_config: Arc<Mutex<QueueConfig>>,
 }
 
 #[vtable]
@@ -970,8 +984,10 @@ fn report_zones(
     }
 
     fn map_queues(tag_set: Pin<&mut TagSet<Self>>) {
-        let mut submit_queue_count = tag_set.data().submit_queue_count;
-        let mut poll_queue_count = tag_set.data().poll_queue_count;
+        let queue_config = tag_set.data().queue_config.lock();
+        let mut submit_queue_count = queue_config.submit_queues;
+        let mut poll_queue_count = queue_config.poll_queues;
+        drop(queue_config);
 
         if tag_set.hw_queue_count() != submit_queue_count + poll_queue_count {
             pr_warn!(

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 19/83] block: rust: allow specifying home node when constructing `TagSet`
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add a `numa_node` parameter to `TagSet::new` to specify the home NUMA
node for tag set allocations. This allows drivers to optimize memory
placement for NUMA systems.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/rnull.rs    | 11 ++++++++++-
 rust/kernel/block/mq.rs         |  5 ++++-
 rust/kernel/block/mq/tag_set.rs |  4 +++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 30de022146ec..6323327d4a5a 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -174,7 +174,16 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
             mq::tag_set::Flags::default()
         };
 
-        let tagset = Arc::pin_init(TagSet::new(submit_queues, 256, 1, flags), GFP_KERNEL)?;
+        let tagset = Arc::pin_init(
+            TagSet::new(
+                submit_queues,
+                256,
+                1,
+                kernel::alloc::NumaNode::NO_NODE,
+                flags,
+            ),
+            GFP_KERNEL,
+        )?;
 
         let queue_data = Box::pin_init(
             pin_init!(QueueData {
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index e556b3bb1191..bac15b509d90 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -57,6 +57,7 @@
 //!
 //! ```rust
 //! use kernel::{
+//!     alloc::NumaNode,
 //!     block::mq::{self, *},
 //!     new_mutex,
 //!     prelude::*,
@@ -92,7 +93,9 @@
 //! }
 //!
 //! let tagset: Arc<TagSet<MyBlkDevice>> =
-//!     Arc::pin_init(TagSet::new(1, 256, 1, mq::tag_set::Flags::default()), GFP_KERNEL)?;
+//!     Arc::pin_init(
+//!         TagSet::new(1, 256, 1, NumaNode::NO_NODE, mq::tag_set::Flags::default()),
+//!         GFP_KERNEL)?;
 //! let mut disk = gen_disk::GenDiskBuilder::new()
 //!     .capacity_sectors(4096)
 //!     .build(fmt!("myblk"), tagset, ())?;
diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
index 5b1a5bcc978d..d6d104adf4aa 100644
--- a/rust/kernel/block/mq/tag_set.rs
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -7,6 +7,7 @@
 use core::pin::Pin;
 
 use crate::{
+    alloc::NumaNode,
     bindings,
     block::mq::{
         operations::OperationsVTable,
@@ -57,6 +58,7 @@ pub fn new(
         nr_hw_queues: u32,
         num_tags: u32,
         num_maps: u32,
+        numa_node: NumaNode,
         flags: Flags,
     ) -> impl PinInit<Self, error::Error> {
         let tag_set: bindings::blk_mq_tag_set = pin_init::zeroed();
@@ -67,7 +69,7 @@ pub fn new(
                     ops: OperationsVTable::<T>::build(),
                     nr_hw_queues,
                     timeout: 0, // 0 means default which is 30Hz in C
-                    numa_node: bindings::NUMA_NO_NODE,
+                    numa_node: numa_node.id(),
                     queue_depth: num_tags,
                     cmd_size,
                     flags: flags.into(),

-- 
2.51.2



^ permalink raw reply related

* [PATCH v2 60/83] block: rust: add request flags abstraction
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
  To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
	Björn Roy Baron, Boqun Feng, Danilo Krummrich,
	FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
	John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
	Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
	Boqun Feng, Lorenzo Stoakes
  Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
	rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>

Add the `Flag` enum and `Flags` type as Rust abstractions for the C
`REQ_*` request flags. These flags modify how block I/O requests are
processed, including sync behavior, priority hints, and integrity
settings.

Also add a `flags()` method to `Request` to retrieve the flags for a
given request.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 rust/bindings/bindings_helper.h      | 21 ++++++++++++
 rust/kernel/block/mq.rs              |  2 ++
 rust/kernel/block/mq/request.rs      | 12 +++++++
 rust/kernel/block/mq/request/flag.rs | 65 ++++++++++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+)

diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 2a69c17bf271..7acda3ae9725 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -140,6 +140,27 @@ const blk_status_t RUST_CONST_HELPER_BLK_STS_OFFLINE = BLK_STS_OFFLINE;
 const blk_status_t RUST_CONST_HELPER_BLK_STS_DURATION_LIMIT = BLK_STS_DURATION_LIMIT;
 const blk_status_t RUST_CONST_HELPER_BLK_STS_INVAL = BLK_STS_INVAL;
 const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ZONED = BLK_FEAT_ZONED;
+const blk_opf_t RUST_CONST_HELPER_REQ_FAILFAST_DEV = REQ_FAILFAST_DEV;
+const blk_opf_t RUST_CONST_HELPER_REQ_FAILFAST_TRANSPORT = REQ_FAILFAST_TRANSPORT;
+const blk_opf_t RUST_CONST_HELPER_REQ_FAILFAST_DRIVER = REQ_FAILFAST_DRIVER;
+const blk_opf_t RUST_CONST_HELPER_REQ_SYNC = REQ_SYNC;
+const blk_opf_t RUST_CONST_HELPER_REQ_META = REQ_META;
+const blk_opf_t RUST_CONST_HELPER_REQ_PRIO = REQ_PRIO;
+const blk_opf_t RUST_CONST_HELPER_REQ_NOMERGE = REQ_NOMERGE;
+const blk_opf_t RUST_CONST_HELPER_REQ_IDLE = REQ_IDLE;
+const blk_opf_t RUST_CONST_HELPER_REQ_INTEGRITY = REQ_INTEGRITY;
+const blk_opf_t RUST_CONST_HELPER_REQ_FUA = REQ_FUA;
+const blk_opf_t RUST_CONST_HELPER_REQ_PREFLUSH = REQ_PREFLUSH;
+const blk_opf_t RUST_CONST_HELPER_REQ_RAHEAD = REQ_RAHEAD;
+const blk_opf_t RUST_CONST_HELPER_REQ_BACKGROUND = REQ_BACKGROUND;
+const blk_opf_t RUST_CONST_HELPER_REQ_NOWAIT = REQ_NOWAIT;
+const blk_opf_t RUST_CONST_HELPER_REQ_POLLED = REQ_POLLED;
+const blk_opf_t RUST_CONST_HELPER_REQ_ALLOC_CACHE = REQ_ALLOC_CACHE;
+const blk_opf_t RUST_CONST_HELPER_REQ_SWAP = REQ_SWAP;
+const blk_opf_t RUST_CONST_HELPER_REQ_DRV = REQ_DRV;
+const blk_opf_t RUST_CONST_HELPER_REQ_FS_PRIVATE = REQ_FS_PRIVATE;
+const blk_opf_t RUST_CONST_HELPER_REQ_ATOMIC = REQ_ATOMIC;
+const blk_opf_t RUST_CONST_HELPER_REQ_NOUNMAP = REQ_NOUNMAP;
 const fop_flags_t RUST_CONST_HELPER_FOP_UNSIGNED_OFFSET = FOP_UNSIGNED_OFFSET;
 
 const xa_mark_t RUST_CONST_HELPER_XA_PRESENT = XA_PRESENT;
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 23bf95136bc1..9bad95d79230 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -137,6 +137,8 @@
 };
 pub use request::{
     Command,
+    Flag as RequestFlag,
+    Flags as RequestFlags,
     IdleRequest,
     Request,
     RequestTimerHandle, //
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index dbe657a80324..84f8b2c17f85 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -48,6 +48,12 @@
 mod command;
 pub use command::Command;
 
+mod flag;
+pub use flag::{
+    Flag,
+    Flags, //
+};
+
 /// A [`Request`] that a driver has not yet begun to process.
 ///
 /// A driver can convert an `IdleRequest` to a [`Request`] by calling [`IdleRequest::start`].
@@ -125,6 +131,12 @@ pub fn command(&self) -> Command {
         unsafe { Command::from_raw(self.command_raw()) }
     }
 
+    pub fn flags(&self) -> Flags {
+        // SAFETY: By C API contract and type invariant, `cmd_flags` is valid for read
+        let flags = unsafe { (*self.0.get()).cmd_flags & !((1 << bindings::REQ_OP_BITS) - 1) };
+        Flags::try_from(flags).expect("Request should have valid flags")
+    }
+
     /// Get the target sector for the request.
     #[inline(always)]
     pub fn sector(&self) -> u64 {
diff --git a/rust/kernel/block/mq/request/flag.rs b/rust/kernel/block/mq/request/flag.rs
new file mode 100644
index 000000000000..01f249269803
--- /dev/null
+++ b/rust/kernel/block/mq/request/flag.rs
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+use crate::{
+    bindings,
+    impl_flags, //
+};
+
+impl_flags! {
+    /// A set of request flags.
+    ///
+    /// This type wraps the C `REQ_*` flags and allows combining multiple flags
+    /// together. These flags modify how a block I/O request is processed.
+    #[derive(Debug, Clone, Default, Copy, PartialEq, Eq)]
+    pub struct Flags(u32);
+
+    /// Individual request flags for block I/O operations.
+    ///
+    /// These flags correspond to the C `REQ_*` defines in `linux/blk_types.h`
+    /// and are used to modify the behavior of block I/O requests.
+    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+    pub enum Flag {
+        /// No driver retries on device errors.
+        FailfastDev = bindings::REQ_FAILFAST_DEV,
+        /// No driver retries on transport errors.
+        FailfastTransport = bindings::REQ_FAILFAST_TRANSPORT,
+        /// No driver retries on driver errors.
+        FailfastDriver = bindings::REQ_FAILFAST_DRIVER,
+        /// Request is synchronous (sync write or read).
+        Sync = bindings::REQ_SYNC,
+        /// Metadata I/O request.
+        Meta = bindings::REQ_META,
+        /// Boost priority in CFQ scheduler.
+        Priority = bindings::REQ_PRIO,
+        /// Don't merge this request with others.
+        NoMerge = bindings::REQ_NOMERGE,
+        /// Anticipate more I/O after this one.
+        Idle = bindings::REQ_IDLE,
+        /// I/O includes block integrity payload.
+        Integrity = bindings::REQ_INTEGRITY,
+        /// Forced unit access - data must be written to persistent storage
+        /// before command completion is signaled.
+        ForcedUnitAccess = bindings::REQ_FUA,
+        /// Request a cache flush before this operation.
+        Preflush = bindings::REQ_PREFLUSH,
+        /// Read ahead request, can fail anytime.
+        ReadAhead = bindings::REQ_RAHEAD,
+        /// Background I/O operation.
+        Background = bindings::REQ_BACKGROUND,
+        /// Don't wait if the request would block.
+        NoWait = bindings::REQ_NOWAIT,
+        /// Caller polls for completion using `bio_poll`.
+        Polled = bindings::REQ_POLLED,
+        /// Allocate I/O from cache if available.
+        AllocCache = bindings::REQ_ALLOC_CACHE,
+        /// Swap I/O operation.
+        Swap = bindings::REQ_SWAP,
+        /// Reserved for driver use.
+        Driver = bindings::REQ_DRV,
+        /// Reserved for file system (submitter) use.
+        FsPrivate = bindings::REQ_FS_PRIVATE,
+        /// Atomic write operation.
+        Atomic = bindings::REQ_ATOMIC,
+        /// Do not free blocks when zeroing (for write zeroes operations).
+        NoUnmap = bindings::REQ_NOUNMAP,
+    }
+}

-- 
2.51.2



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox