* [PATCH v2 51/83] block: rust: add zoned block device support
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add support for zoned block devices to the Rust block layer bindings.
This includes the `report_zones` callback in `Operations` and methods
in `GenDiskBuilder` to configure zoned device parameters.
Drivers can mark a disk as zoned and configure the zone size and
maximum zone append size. The `report_zones` callback is invoked by
the block layer to query zone information.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/bindings/bindings_helper.h | 1 +
rust/kernel/block/mq/gen_disk.rs | 95 +++++++++++++++++++++++++++++++++-----
rust/kernel/block/mq/operations.rs | 61 +++++++++++++++++++++++-
3 files changed, 145 insertions(+), 12 deletions(-)
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index eaf05d60dda9..2a69c17bf271 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -139,6 +139,7 @@ const blk_status_t RUST_CONST_HELPER_BLK_STS_ZONE_ACTIVE_RESOURCE = BLK_STS_ZONE
const blk_status_t RUST_CONST_HELPER_BLK_STS_OFFLINE = BLK_STS_OFFLINE;
const blk_status_t RUST_CONST_HELPER_BLK_STS_DURATION_LIMIT = BLK_STS_DURATION_LIMIT;
const blk_status_t RUST_CONST_HELPER_BLK_STS_INVAL = BLK_STS_INVAL;
+const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ZONED = BLK_FEAT_ZONED;
const fop_flags_t RUST_CONST_HELPER_FOP_UNSIGNED_OFFSET = FOP_UNSIGNED_OFFSET;
const xa_mark_t RUST_CONST_HELPER_XA_PRESENT = XA_PRESENT;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 79a67b545eca..eedba691e167 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -8,6 +8,7 @@
use crate::{
bindings,
block::mq::{
+ operations::OperationsVTable,
Operations,
RequestQueue,
TagSet, //
@@ -48,6 +49,12 @@ pub struct GenDiskBuilder<T> {
physical_block_size: u32,
capacity_sectors: u64,
max_hw_discard_sectors: u32,
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ zoned: bool,
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ zone_size_sectors: u32,
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ zone_append_max_sectors: u32,
_p: PhantomData<T>,
}
@@ -59,6 +66,12 @@ fn default() -> Self {
physical_block_size: bindings::PAGE_SIZE as u32,
capacity_sectors: 0,
max_hw_discard_sectors: 0,
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ zoned: false,
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ zone_size_sectors: 0,
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ zone_append_max_sectors: 0,
_p: PhantomData,
}
}
@@ -130,6 +143,27 @@ pub fn max_hw_discard_sectors(mut self, max_hw_discard_sectors: u32) -> Self {
self
}
+ /// Mark this device as a zoned block device.
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ pub fn zoned(mut self, enable: bool) -> Self {
+ self.zoned = enable;
+ self
+ }
+
+ /// Set the zone size of this block device.
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ pub fn zone_size(mut self, sectors: u32) -> Self {
+ self.zone_size_sectors = sectors;
+ self
+ }
+
+ /// Set the max zone append size for this block device.
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ pub fn zone_append_max(mut self, sectors: u32) -> Self {
+ self.zone_append_max_sectors = sectors;
+ self
+ }
+
/// Build a new `GenDisk` and add it to the VFS.
pub fn build(
self,
@@ -149,7 +183,18 @@ pub fn build(
lim.physical_block_size = self.physical_block_size;
lim.max_hw_discard_sectors = self.max_hw_discard_sectors;
if self.rotational {
- lim.features = bindings::BLK_FEAT_ROTATIONAL;
+ lim.features |= bindings::BLK_FEAT_ROTATIONAL;
+ }
+
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ if self.zoned {
+ if !T::HAS_REPORT_ZONES {
+ return Err(error::code::EINVAL);
+ }
+
+ lim.features |= bindings::BLK_FEAT_ZONED;
+ lim.chunk_sectors = self.zone_size_sectors;
+ lim.max_hw_zone_append_sectors = self.zone_append_max_sectors;
}
// SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
@@ -179,14 +224,6 @@ pub fn build(
// operation, so we will not race.
unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) };
- crate::error::to_result(
- // SAFETY: `gendisk` points to a valid and initialized instance of
- // `struct gendisk`.
- unsafe {
- bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
- },
- )?;
-
recover_data.dismiss();
// INVARIANT: `gendisk` was initialized above.
@@ -214,7 +251,27 @@ pub fn build(
GFP_KERNEL,
)?;
- Ok(disk.into())
+ let disk: Arc<_> = disk.into();
+
+ // SAFETY: `disk.gendisk` is valid for write as we initialized it above. We have exclusive
+ // access.
+ unsafe { (*disk.gendisk).private_data = Arc::as_ptr(&disk).cast_mut().cast() };
+
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ if self.zoned {
+ // SAFETY: `disk.gendisk` is valid as we initialized it above. We have exclusive access.
+ unsafe { bindings::blk_revalidate_disk_zones(gendisk) };
+ }
+
+ crate::error::to_result(
+ // SAFETY: `gendisk` points to a valid and initialized instance of
+ // `struct gendisk`.
+ unsafe {
+ bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
+ },
+ )?;
+
+ Ok(disk)
}
const VTABLE: bindings::block_device_operations = bindings::block_device_operations {
@@ -228,7 +285,11 @@ pub fn build(
getgeo: None,
set_read_only: None,
swap_slot_free_notify: None,
- report_zones: None,
+ report_zones: if T::HAS_REPORT_ZONES {
+ Some(OperationsVTable::<T>::report_zones_callback)
+ } else {
+ None
+ },
devnode: None,
alternative_gpt_sector: None,
get_unique_id: None,
@@ -327,6 +388,18 @@ fn drop(&mut self) {
/// `self.0` is valid for use as a reference.
pub struct GenDiskRef<T: Operations>(NonNull<GenDisk<T>>);
+impl<T: Operations> GenDiskRef<T> {
+ /// Create a `GenDiskRef` from a pointer to a `GenDisk`.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must be valid for use as a `GenDisk` reference for the lifetime of the returned
+ /// `GenDiskRef`.
+ pub(crate) unsafe fn from_ptr(ptr: NonNull<GenDisk<T>>) -> GenDiskRef<T> {
+ Self(ptr)
+ }
+}
+
// SAFETY: It is safe to transfer ownership of `GenDiskRef` across thread boundaries.
unsafe impl<T: Operations> Send for GenDiskRef<T> {}
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index b9a2bf6592b3..71d4192d627f 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -9,6 +9,7 @@
block::{
error::BlkResult,
mq::{
+ gen_disk::GenDiskRef,
request::RequestDataWrapper,
IdleRequest,
Request, //
@@ -16,6 +17,7 @@
},
error::{
from_result,
+ to_result,
Result, //
},
prelude::*,
@@ -29,7 +31,10 @@
Owned, //
},
};
-use core::marker::PhantomData;
+use core::{
+ marker::PhantomData,
+ ptr::NonNull, //
+};
use pin_init::PinInit;
type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>;
@@ -107,6 +112,20 @@ fn init_hctx(
fn poll(_hw_data: ForeignBorrowed<'_, Self::HwData>) -> bool {
build_error!(crate::error::VTABLE_DEFAULT_ERROR)
}
+
+ /// Called by the kernel to get a zone report from the driver.
+ ///
+ /// The driver must call `callback` once for each zone on `disk` and populate the first argument
+ /// with a zone descriptor and the second argument when the zone index.
+ // TODO: We cannot gate this on CONFIG_BLK_DEV_ZONED due to limitations of the `vtable` macro.
+ fn report_zones(
+ _disk: &GenDiskRef<Self>,
+ _sector: u64,
+ _nr_zones: u32,
+ _callback: impl Fn(&bindings::blk_zone, u32) -> Result,
+ ) -> Result<u32> {
+ Err(ENOTSUPP)
+ }
}
/// A vtable for blk-mq to interact with a block device driver.
@@ -359,6 +378,46 @@ impl<T: Operations> OperationsVTable<T> {
unsafe { core::ptr::drop_in_place(pdu) };
}
+ /// This function is a callback hook for the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called by blk-mq C infrastructure.
+ /// - `disk_ptr` must be a pointer to a gendisk initialized by `GenDisk::build`.
+ pub(crate) unsafe extern "C" fn report_zones_callback(
+ disk_ptr: *mut bindings::gendisk,
+ sector: u64,
+ nr_zones: u32,
+ args: *mut bindings::blk_report_zones_args,
+ ) -> i32 {
+ // SAFETY: As `disk_ptr` is a gendisk initialized by `GenDisk::build`, `private_data` is not
+ // null.
+ let disk_ref_ptr = unsafe { NonNull::new_unchecked((*disk_ptr).private_data.cast()) };
+
+ // SAFETY: `disk_ptr.private_data` is a pointer to the `GenDisk` owner of `disk_ptr` that we
+ // installed when we initialized `disk_ptr`. It is valid for use as a reference for the
+ // duration of this call.
+ let disk = unsafe { GenDiskRef::from_ptr(disk_ref_ptr) };
+
+ from_result(|| {
+ T::report_zones(&disk, sector, nr_zones, |zone, idx| -> Result {
+ to_result(
+ // SAFETY: `disk_ptr` is valid by function safety requirements.
+ unsafe {
+ bindings::disk_report_zone(
+ disk_ptr,
+ core::ptr::from_ref(zone).cast_mut(),
+ idx,
+ args,
+ )
+ },
+ )
+ })
+ .and_then(|v: u32| -> Result<_> { Ok(v.try_into()?) })
+ })
+ }
+
const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops {
queue_rq: Some(Self::queue_rq_callback),
queue_rqs: None,
--
2.51.2
^ permalink raw reply related
* [PATCH v2 65/83] block: rust: add `GenDisk::tag_set`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add a method to `GenDisk` to obtain a reference to the associated `TagSet`.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/block/mq/gen_disk.rs | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 5367ca92b7aa..a50ba7b605d7 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -257,7 +257,7 @@ pub fn build(
// `__blk_mq_alloc_disk` above.
let mut disk = UniqueArc::new(
GenDisk {
- _tagset: tagset,
+ tag_set: tagset,
gendisk,
backref: Arc::pin_init(
// INVARIANT: We break `GenDiskRef` invariant here, but we restore it below.
@@ -341,7 +341,7 @@ pub(crate) const fn build_vtable() -> &'static bindings::block_device_operations
/// `bindings::device_add_disk`.
/// - `self.gendisk.queue.queuedata` is initialized by a call to `ForeignOwnable::into_foreign`.
pub struct GenDisk<T: Operations> {
- _tagset: Arc<TagSet<T>>,
+ tag_set: Arc<TagSet<T>>,
gendisk: *mut bindings::gendisk,
backref: Arc<Revocable<GenDiskRef<T>>>,
}
@@ -363,6 +363,11 @@ pub fn queue_data(&self) -> <T::QueueData as ForeignOwnable>::Borrowed<'_> {
// SAFETY: By type invariant, self is a valid gendisk.
unsafe { T::QueueData::borrow((*(*self.gendisk).queue).queuedata) }
}
+
+ /// Get a reference to the `TagSet` used by this `GenDisk`.
+ pub fn tag_set(&self) -> &Arc<TagSet<T>> {
+ &self.tag_set
+ }
}
// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
--
2.51.2
^ permalink raw reply related
* [PATCH v2 16/83] block: rnull: add memory backing
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add memory backing to the rust null block driver. This implementation will
always allocate a page on write, even though a page backing the written
sector is already allocated, in which case the page will be released again.
A later patch will fix this inefficiency.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 8 ++-
drivers/block/rnull/rnull.rs | 126 ++++++++++++++++++++++++++++++++++------
2 files changed, 116 insertions(+), 18 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 83b474f6da60..8daf2ca409ba 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -60,7 +60,7 @@ impl AttributeOperations<0> for Config {
fn show(_this: &Config, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
let mut writer = kernel::str::Formatter::new(page);
- writer.write_str("blocksize,size,rotational,irqmode,completion_nsec\n")?;
+ writer.write_str("blocksize,size,rotational,irqmode,completion_nsec,memory_backed\n")?;
Ok(writer.bytes_written())
}
}
@@ -84,6 +84,7 @@ fn make_group(
size: 3,
irqmode: 4,
completion_nsec: 5,
+ memory_backed: 6,
],
};
@@ -101,6 +102,7 @@ fn make_group(
irq_mode: IRQMode::None,
completion_time: time::Delta::ZERO,
name: name.try_into()?,
+ memory_backed: false,
}),
}),
core::iter::empty(),
@@ -165,6 +167,7 @@ struct DeviceConfigInner {
irq_mode: IRQMode,
completion_time: time::Delta,
disk: Option<GenDisk<NullBlkDevice>>,
+ memory_backed: bool,
}
#[vtable]
@@ -195,6 +198,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
guard.capacity_mib,
guard.irq_mode,
guard.completion_time,
+ guard.memory_backed,
)?);
guard.powered = true;
} else if guard.powered && !power_op {
@@ -226,3 +230,5 @@ fn from_str(s: &str) -> Result<Self> {
value.try_into()
}
}
+
+configfs_simple_bool_field!(DeviceConfig, 6, memory_backed);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 746ddadd11f0..8e4d2b270bcf 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -6,8 +6,10 @@
use configfs::IRQMode;
use kernel::{
+ bindings,
block::{
self,
+ bio::Segment,
mq::{
self,
gen_disk::{
@@ -19,15 +21,12 @@
},
},
error::Result,
- new_mutex,
+ memalloc_scope, new_mutex, new_xarray,
+ page::SafePage,
pr_info,
prelude::*,
str::CString,
- sync::{
- aref::ARef,
- Arc,
- Mutex, //
- },
+ sync::{aref::ARef, Arc, Mutex},
time::{
hrtimer::{
HrTimerCallback,
@@ -40,7 +39,8 @@
types::{
OwnableRefCounted,
Owned, //
- }, //
+ },
+ xarray::XArray,
};
module! {
@@ -74,6 +74,10 @@
default: 10_000,
description: "Time in ns to complete a request in hardware. Default: 10,000ns",
},
+ memory_backed: bool {
+ default: false,
+ description: "Create a memory-backed block device.",
+ },
},
}
@@ -103,6 +107,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
module_parameters::gb.value() * 1024,
module_parameters::irqmode.value().try_into()?,
Delta::from_nanos(completion_time),
+ module_parameters::memory_backed.value(),
)?;
disks.push(disk, GFP_KERNEL)?;
}
@@ -127,17 +132,23 @@ fn new(
capacity_mib: u64,
irq_mode: IRQMode,
completion_time: Delta,
+ memory_backed: bool,
) -> Result<GenDisk<Self>> {
- let tagset = Arc::pin_init(
- TagSet::new(1, 256, 1, mq::tag_set::Flags::default()),
- GFP_KERNEL,
- )?;
+ let flags = if memory_backed {
+ mq::tag_set::Flag::Blocking.into()
+ } else {
+ mq::tag_set::Flags::default()
+ };
+
+ let tagset = Arc::pin_init(TagSet::new(1, 256, 1, flags), GFP_KERNEL)?;
- let queue_data = Box::new(
- QueueData {
+ let queue_data = Box::pin_init(
+ pin_init!(QueueData {
+ tree <- new_xarray!(kernel::xarray::AllocKind::Alloc),
irq_mode,
completion_time,
- },
+ memory_backed,
+ }),
GFP_KERNEL,
)?;
@@ -148,11 +159,72 @@ fn new(
.rotational(rotational)
.build(fmt!("{}", name.to_str()?), tagset, queue_data)
}
+
+ #[inline(always)]
+ fn write(tree: &XArray<TreeNode>, mut sector: usize, mut segment: Segment<'_>) -> Result {
+ while !segment.is_empty() {
+ let page = SafePage::alloc_page(GFP_KERNEL)?;
+ let mut tree = tree.lock();
+
+ let page_idx = sector >> block::PAGE_SECTORS_SHIFT;
+
+ let page = if let Some(page) = tree.get_mut(page_idx) {
+ page
+ } else {
+ tree.store(page_idx, page, GFP_KERNEL)?;
+ tree.get_mut(page_idx).unwrap()
+ };
+
+ let page_offset = (sector & block::PAGE_SECTOR_MASK as usize) << block::SECTOR_SHIFT;
+ sector += segment.copy_to_page(page, page_offset) >> block::SECTOR_SHIFT;
+ }
+ Ok(())
+ }
+
+ #[inline(always)]
+ fn read(tree: &XArray<TreeNode>, mut sector: usize, mut segment: Segment<'_>) -> Result {
+ let tree = tree.lock();
+
+ while !segment.is_empty() {
+ let idx = sector >> block::PAGE_SECTORS_SHIFT;
+
+ if let Some(page) = tree.get(idx) {
+ let page_offset =
+ (sector & block::PAGE_SECTOR_MASK as usize) << block::SECTOR_SHIFT;
+ sector += segment.copy_from_page(page, page_offset) >> block::SECTOR_SHIFT;
+ } else {
+ sector += segment.zero_page() >> block::SECTOR_SHIFT;
+ }
+ }
+
+ Ok(())
+ }
+
+ #[inline(never)]
+ fn transfer(
+ command: bindings::req_op,
+ tree: &XArray<TreeNode>,
+ sector: usize,
+ segment: Segment<'_>,
+ ) -> Result {
+ match command {
+ bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?,
+ bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?,
+ _ => (),
+ }
+ Ok(())
+ }
}
+type TreeNode = Owned<SafePage>;
+
+#[pin_data]
struct QueueData {
+ #[pin]
+ tree: XArray<TreeNode>,
irq_mode: IRQMode,
completion_time: Delta,
+ memory_backed: bool,
}
#[pin_data]
@@ -182,7 +254,7 @@ impl HasHrTimer<Self> for Pdu {
#[vtable]
impl Operations for NullBlkDevice {
- type QueueData = KBox<QueueData>;
+ type QueueData = Pin<KBox<QueueData>>;
type RequestData = Pdu;
fn new_request_data() -> impl PinInit<Self::RequestData> {
@@ -192,7 +264,27 @@ fn new_request_data() -> impl PinInit<Self::RequestData> {
}
#[inline(always)]
- fn queue_rq(queue_data: &QueueData, rq: Owned<mq::Request<Self>>, _is_last: bool) -> Result {
+ fn queue_rq(
+ queue_data: Pin<&QueueData>,
+ mut rq: Owned<mq::Request<Self>>,
+ _is_last: bool,
+ ) -> Result {
+ if queue_data.memory_backed {
+ memalloc_scope!(let _noio: NoIo);
+ let tree = &queue_data.tree;
+ let command = rq.command();
+ let mut sector = rq.sector();
+
+ for bio in rq.bio_iter_mut() {
+ let segment_iter = bio.segment_iter();
+ for segment in segment_iter {
+ let length = segment.len();
+ Self::transfer(command, tree, sector, segment)?;
+ sector += length as usize >> block::SECTOR_SHIFT;
+ }
+ }
+ }
+
match queue_data.irq_mode {
IRQMode::None => rq.end_ok(),
IRQMode::Soft => mq::Request::complete(rq.into()),
@@ -205,7 +297,7 @@ fn queue_rq(queue_data: &QueueData, rq: Owned<mq::Request<Self>>, _is_last: bool
Ok(())
}
- fn commit_rqs(_queue_data: &QueueData) {}
+ fn commit_rqs(_queue_data: Pin<&QueueData>) {}
fn complete(rq: ARef<mq::Request<Self>>) {
OwnableRefCounted::try_from_shared(rq)
--
2.51.2
^ permalink raw reply related
* [PATCH v2 82/83] block: rnull: add `shared_tag_bitmap` config option
From: Andreas Hindborg @ 2026-06-09 19:09 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add a configfs attribute and module parameter to enable the
`BLK_MQ_F_TAG_HCTX_SHARED` flag for the rnull tag set. When enabled,
a tag bitmap is shared across all hardware queues.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 5 +++++
drivers/block/rnull/rnull.rs | 12 ++++++++++++
rust/kernel/block/mq/tag_set/flags.rs | 4 ++++
3 files changed, 21 insertions(+)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 3e054339226c..1bab38c55698 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -134,6 +134,7 @@ fn make_group(
fua: 28,
max_sectors: 29,
virt_boundary: 30,
+ shared_tag_bitmap: 31,
],
};
@@ -223,6 +224,7 @@ fn make_group(
init_hctx_inject,
max_sectors: 0,
virt_boundary: false,
+ shared_tag_bitmap: false,
}),
}),
default_groups,
@@ -318,6 +320,7 @@ struct DeviceConfigInner {
init_hctx_inject: Arc<FaultConfig>,
max_sectors: u32,
virt_boundary: bool,
+ shared_tag_bitmap: bool,
}
#[vtable]
@@ -374,6 +377,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
blocking: guard.blocking,
memory_backed: guard.memory_backed,
no_sched: guard.no_sched,
+ shared_tag_bitmap: guard.shared_tag_bitmap,
hw_queue_depth: guard.hw_queue_depth,
#[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
init_hctx_inject: guard.init_hctx_inject.clone(),
@@ -622,3 +626,4 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
configfs_simple_bool_field!(DeviceConfig, 28, fua);
configfs_simple_field!(DeviceConfig, 29, max_sectors, u32);
configfs_simple_bool_field!(DeviceConfig, 30, virt_boundary);
+configfs_simple_bool_field!(DeviceConfig, 31, shared_tag_bitmap);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 147dc8498c3a..81f9e2d03f31 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -216,6 +216,10 @@
default: false,
description: "Set alignment requirement for IO buffers to be page size.",
},
+ shared_tag_bitmap: bool {
+ default: false,
+ description: "Use shared tag bitmap for all submission queues for blk-mq.",
+ },
},
}
@@ -245,6 +249,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
let memory_backed = module_parameters::memory_backed.value();
let no_sched = module_parameters::no_sched.value();
let hw_queue_depth = module_parameters::hw_queue_depth.value();
+ let shared_tag_bitmap = module_parameters::shared_tag_bitmap.value();
let shared_tag_set = NullBlkDevice::build_tag_set(TagSetOptions {
home_node,
@@ -258,6 +263,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
blocking,
memory_backed,
no_sched,
+ shared_tag_bitmap,
hw_queue_depth,
#[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
init_hctx_inject: Arc::pin_init(
@@ -300,6 +306,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
blocking,
memory_backed,
no_sched,
+ shared_tag_bitmap,
hw_queue_depth,
#[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
init_hctx_inject: Arc::pin_init(
@@ -404,6 +411,7 @@ struct TagSetOptions {
blocking: bool,
memory_backed: bool,
no_sched: bool,
+ shared_tag_bitmap: bool,
hw_queue_depth: u32,
#[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
init_hctx_inject: Arc<FaultConfig>,
@@ -419,6 +427,7 @@ fn build_tag_set(options: TagSetOptions) -> Result<Arc<TagSet<Self>>> {
blocking,
memory_backed,
no_sched,
+ shared_tag_bitmap,
hw_queue_depth,
#[cfg(CONFIG_BLK_DEV_RUST_NULL_FAULT_INJECTION)]
init_hctx_inject,
@@ -441,6 +450,9 @@ fn build_tag_set(options: TagSetOptions) -> Result<Arc<TagSet<Self>>> {
if no_sched {
flags |= mq::tag_set::Flag::NoDefaultScheduler;
}
+ if shared_tag_bitmap {
+ flags |= mq::tag_set::Flag::TagHctxShared;
+ }
let queue_config_guard = queue_config.lock();
let submit_queues = queue_config_guard.submit_queues;
diff --git a/rust/kernel/block/mq/tag_set/flags.rs b/rust/kernel/block/mq/tag_set/flags.rs
index 2561d7090c49..afc9d31ed998 100644
--- a/rust/kernel/block/mq/tag_set/flags.rs
+++ b/rust/kernel/block/mq/tag_set/flags.rs
@@ -21,5 +21,9 @@ pub enum Flag {
/// Select 'none' during queue registration in case of a single hwq or shared
/// hwqs instead of 'mq-deadline'.
NoDefaultScheduler = bindings::BLK_MQ_F_NO_SCHED_BY_DEFAULT,
+
+ /// Use shared tag bitmap for all submission queues.
+ TagHctxShared = bindings::BLK_MQ_F_TAG_HCTX_SHARED,
+
}
}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 02/83] rust: block: rename `SECTOR_MASK` to `PAGE_SECTOR_MASK`
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
The constant exposes `bindings::SECTOR_MASK`, which masks the sector
index within a page (`PAGE_SIZE / SECTOR_SIZE - 1`), not `SECTOR_SIZE`
itself as the original docstring suggested. The misleading name made
it easy for callers to reach for it when they wanted a byte-level
sector mask.
Rename the Rust constant to `PAGE_SECTOR_MASK` and fix the docstring.
The C binding is unchanged.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/block.rs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs
index 32c8d865afb6..b120e83d9425 100644
--- a/rust/kernel/block.rs
+++ b/rust/kernel/block.rs
@@ -4,8 +4,8 @@
pub mod mq;
-/// Bit mask for masking out [`SECTOR_SIZE`].
-pub const SECTOR_MASK: u32 = bindings::SECTOR_MASK;
+/// Bit mask for masking out the sector index in a page.
+pub const PAGE_SECTOR_MASK: u32 = bindings::SECTOR_MASK;
/// Sectors are size `1 << SECTOR_SHIFT`.
pub const SECTOR_SHIFT: u32 = bindings::SECTOR_SHIFT;
--
2.51.2
^ permalink raw reply related
* [PATCH v2 37/83] block: rust: add a back reference feature to `GenDisk`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
During certain block layer callbacks, drivers may need access to the Rust
`GenDisk` representing a disk the driver is managing. In some situations it
is only possible to obtain a pointer to the C `struct gendisk`. With the
current setup, it is not possible to obtain the `GenDisk` for this C
`gendisk`. To circumvent this, we add a back reference feature to the
`GenDisk` so that we can store a reference counted reference to the
`GenDisk` somewhere easily accessible.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 2 +-
drivers/block/rnull/rnull.rs | 4 +--
rust/kernel/block/mq/gen_disk.rs | 65 ++++++++++++++++++++++++++++++++++++----
3 files changed, 62 insertions(+), 9 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 504bb477c2d0..4df0b748596a 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -198,7 +198,7 @@ struct DeviceConfigInner {
capacity_mib: u64,
irq_mode: IRQMode,
completion_time: time::Delta,
- disk: Option<GenDisk<NullBlkDevice>>,
+ disk: Option<Arc<GenDisk<NullBlkDevice>>>,
memory_backed: bool,
submit_queues: u32,
home_node: i32,
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 877683dba0ac..fd9b770965a6 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -134,7 +134,7 @@ struct NullBlkModule {
#[pin]
configfs_subsystem: kernel::configfs::Subsystem<configfs::Config>,
#[pin]
- param_disks: Mutex<KVec<GenDisk<NullBlkDevice>>>,
+ param_disks: Mutex<KVec<Arc<GenDisk<NullBlkDevice>>>>,
}
impl kernel::InPlaceModule for NullBlkModule {
@@ -216,7 +216,7 @@ struct NullBlkDevice {
}
impl NullBlkDevice {
- fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
+ fn new(options: NullBlkOptions<'_>) -> Result<Arc<GenDisk<Self>>> {
let NullBlkOptions {
name,
block_size,
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 94af85fe1716..f51bccb0d2ef 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -21,14 +21,19 @@
Write, //
},
prelude::*,
+ revocable::Revocable,
static_lock_class,
str::NullTerminatedFormatter,
- sync::Arc,
+ sync::{
+ Arc,
+ UniqueArc, //
+ },
types::{
ForeignOwnable,
ScopeGuard, //
},
};
+use core::ptr::NonNull;
/// A builder for [`GenDisk`].
///
@@ -125,7 +130,7 @@ pub fn build<T: Operations>(
name: fmt::Arguments<'_>,
tagset: Arc<TagSet<T>>,
queue_data: T::QueueData,
- ) -> Result<GenDisk<T>> {
+ ) -> Result<Arc<GenDisk<T>>> {
let data = queue_data.into_foreign();
let recover_data = ScopeGuard::new(|| {
// SAFETY: T::QueueData was created by the call to `into_foreign()` above
@@ -204,10 +209,28 @@ pub fn build<T: Operations>(
// INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
// INVARIANT: `gendisk.queue.queue_data` is set to `data` in the call to
// `__blk_mq_alloc_disk` above.
- Ok(GenDisk {
- _tagset: tagset,
- gendisk,
- })
+ let mut disk = UniqueArc::new(
+ GenDisk {
+ _tagset: tagset,
+ gendisk,
+ backref: Arc::pin_init(
+ // INVARIANT: We break `GenDiskRef` invariant here, but we restore it below.
+ Revocable::new(GenDiskRef(NonNull::dangling())),
+ GFP_KERNEL,
+ )?,
+ },
+ GFP_KERNEL,
+ )?;
+
+ disk.backref = Arc::pin_init(
+ // INVARIANT: The `GenDisk` in `disk` is a valid for use as a reference.
+ Revocable::new(GenDiskRef(
+ NonNull::new(UniqueArc::as_ptr(&disk).cast_mut()).expect("Should not be null"),
+ )),
+ GFP_KERNEL,
+ )?;
+
+ Ok(disk.into())
}
}
@@ -222,6 +245,14 @@ pub fn build<T: Operations>(
pub struct GenDisk<T: Operations> {
_tagset: Arc<TagSet<T>>,
gendisk: *mut bindings::gendisk,
+ backref: Arc<Revocable<GenDiskRef<T>>>,
+}
+
+impl<T: Operations> GenDisk<T> {
+ /// Get a `GenDiskRef` referencing this `GenDisk`.
+ pub fn get_ref(&self) -> Arc<Revocable<GenDiskRef<T>>> {
+ self.backref.clone()
+ }
}
// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
@@ -264,3 +295,25 @@ fn drop(&mut self) {
drop(unsafe { T::QueueData::from_foreign(queue_data) });
}
}
+
+/// A reference to a `GenDisk`.
+///
+/// # Invariants
+///
+/// `self.0` is valid for use as a reference.
+pub struct GenDiskRef<T: Operations>(NonNull<GenDisk<T>>);
+
+// SAFETY: It is safe to transfer ownership of `GenDiskRef` across thread boundaries.
+unsafe impl<T: Operations> Send for GenDiskRef<T> {}
+
+// SAFETY: It is safe to share references to `GenDiskRef` across thread boundaries.
+unsafe impl<T: Operations> Sync for GenDiskRef<T> {}
+
+impl<T: Operations> core::ops::Deref for GenDiskRef<T> {
+ type Target = GenDisk<T>;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By type invariant, `self.0` is valid for use as a reference.
+ unsafe { self.0.as_ref() }
+ }
+}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 55/83] block: rust: add an abstraction for `struct blk_mq_queue_map`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add the `QueueMap` and `QueueType` types as Rust abstractions for CPU
to hardware queue mappings. The `QueueMap` type wraps `struct
blk_mq_queue_map` and provides methods to set up the mapping between
CPUs and hardware queues.
`QueueType` represents the different queue types: default, read, and
poll queues.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/block/mq.rs | 5 +-
rust/kernel/block/mq/operations.rs | 10 ++--
rust/kernel/block/mq/tag_set.rs | 96 ++++++++++++++++++++++++++++++++++++++
3 files changed, 105 insertions(+), 6 deletions(-)
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 5bf2cf2736a5..e9bea19d684b 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -138,4 +138,7 @@
RequestTimerHandle, //
};
pub use request_queue::RequestQueue;
-pub use tag_set::TagSet;
+pub use tag_set::{
+ QueueType,
+ TagSet, //
+};
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index 8a418bf0f3ba..06faf5647aaa 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -128,8 +128,8 @@ fn report_zones(
Err(ENOTSUPP)
}
- /// Called by the kernel to map submission queues to CPU cores.
- fn map_queues(_tag_set: &TagSet<Self>) {
+ /// Called by the kernel to map hardware queues to CPU cores.
+ fn map_queues(_tag_set: Pin<&mut TagSet<Self>>) {
build_error!(crate::error::VTABLE_DEFAULT_ERROR)
}
}
@@ -433,9 +433,9 @@ impl<T: Operations> OperationsVTable<T> {
/// must be a pointer to a valid and initialized `TagSet<T>`. The pointee
/// must be valid for use as a reference at least the duration of this call.
unsafe extern "C" fn map_queues_callback(tag_set: *mut bindings::blk_mq_tag_set) {
- // SAFETY: The safety requirements of this function satiesfies the
- // requirements of `TagSet::from_ptr`.
- let tag_set = unsafe { TagSet::from_ptr(tag_set) };
+ // SAFETY: By C API contract `tag_set` is the tag set registered with the `GenDisk` created
+ // by `GenDiskBuilder`.
+ let tag_set = unsafe { TagSet::from_ptr_mut(tag_set) };
T::map_queues(tag_set);
}
diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
index d3e93ad98b6e..e62dfd267fd9 100644
--- a/rust/kernel/block/mq/tag_set.rs
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -124,11 +124,46 @@ pub fn flags(&self) -> Flags {
/// `ptr` must be a pointer to a valid and initialized `TagSet<T>`. There
/// may be no other mutable references to the tag set. The pointee must be
/// live and valid at least for the duration of the returned lifetime `'a`.
+ #[expect(dead_code)]
pub(crate) unsafe fn from_ptr<'a>(ptr: *mut bindings::blk_mq_tag_set) -> &'a Self {
// SAFETY: By the safety requirements of this function, `ptr` is valid
// for use as a reference for the duration of `'a`.
unsafe { &*(ptr.cast::<Self>()) }
}
+
+ /// Create a `TagSet<T>` from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must be a pointer to a valid and initialized `TagSet<T>`. There
+ /// may be no other mutable references to the tag set. The pointee must be
+ /// live and valid at least for the duration of the returned lifetime `'a`.
+ pub(crate) unsafe fn from_ptr_mut<'a>(ptr: *mut bindings::blk_mq_tag_set) -> Pin<&'a mut Self> {
+ // SAFETY: By function safety requirements, `ptr` is valid for use as a mutable reference.
+ let mref = unsafe { &mut *(ptr.cast::<Self>()) };
+
+ // SAFETY: We never move out of `mref`.
+ unsafe { Pin::new_unchecked(mref) }
+ }
+
+ /// Helper function to invoke a closure each hardware queue type supported.
+ ///
+ /// This function invokes `cb` for each variant of [`QueueType`] that this [`TagSet`] supports.
+ /// This is helpful for setting up CPU to hardware queue maps in the [`Operations::map_queues`]
+ /// callback.
+ pub fn update_maps(self: Pin<&mut Self>, mut cb: impl FnMut(QueueMap)) -> Result {
+ // SAFETY: By type invariant, `self.inner` is valid.
+ let nr_maps = unsafe { (*self.inner.get()).nr_maps };
+ for i in 0..nr_maps {
+ cb(QueueMap {
+ // SAFETY: By type invariant, `self.inner` is valid.
+ map: unsafe { &raw mut (*self.inner.get()).map[i as usize] },
+ kind: i.try_into()?,
+ });
+ }
+
+ Ok(())
+ }
}
#[pinned_drop]
@@ -164,3 +199,64 @@ unsafe impl<T> Send for TagSet<T>
T::TagSetData: Send,
{
}
+
+/// A [`TagSet`] CPU to hardware queue mapping.
+///
+/// # Invariants
+///
+/// - `self.map` points to a valid `blk_mq_queue_map`
+pub struct QueueMap {
+ map: *mut bindings::blk_mq_queue_map,
+ kind: QueueType,
+}
+
+impl QueueMap {
+ /// Set the number of queues for this mapping kind.
+ pub fn set_queue_count(&mut self, nr_queues: u32) {
+ // SAFETY: By type invariant, `self.map` is valid.
+ unsafe { (*self.map).nr_queues = nr_queues }
+ }
+
+ /// First hardware queue to map this queue kind onto. Used by the PCIe NVMe driver to map each
+ /// hardware queue type ([`QueueType`]) onto a distinct set of hardware queues.
+ pub fn set_offset(&mut self, offset: u32) {
+ // SAFETY: By type invariant, `self.map` is valid.
+ unsafe { (*self.map).queue_offset = offset }
+ }
+
+ /// Effectuate the mapping described by [`Self`].
+ pub fn map_queues(&self) {
+ // SAFETY: By type invariant, `self.map` is valid.
+ unsafe { bindings::blk_mq_map_queues(self.map) }
+ }
+
+ /// Return the kind of this queue mapping.
+ pub fn kind(&self) -> QueueType {
+ self.kind
+ }
+}
+
+/// Type of hardware queue.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[repr(u32)]
+pub enum QueueType {
+ /// All I/O not otherwise accounted for.
+ Default = bindings::hctx_type_HCTX_TYPE_DEFAULT,
+ /// Just for READ I/O.
+ Read = bindings::hctx_type_HCTX_TYPE_READ,
+ /// Polled I/O of any kind.
+ Poll = bindings::hctx_type_HCTX_TYPE_POLL,
+}
+
+impl TryFrom<u32> for QueueType {
+ type Error = kernel::error::Error;
+
+ fn try_from(value: u32) -> core::result::Result<Self, Self::Error> {
+ match value {
+ bindings::hctx_type_HCTX_TYPE_DEFAULT => Ok(QueueType::Default),
+ bindings::hctx_type_HCTX_TYPE_READ => Ok(QueueType::Read),
+ bindings::hctx_type_HCTX_TYPE_POLL => Ok(QueueType::Poll),
+ _ => Err(kernel::error::code::EINVAL),
+ }
+ }
+}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 17/83] block: rnull: add submit queue count config option
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Allow user space to control the number of submission queues when creating
null block devices.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 56 +++++++++++++++++++++++++++++++++--------
drivers/block/rnull/rnull.rs | 56 +++++++++++++++++++++++++++--------------
2 files changed, 83 insertions(+), 29 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 8daf2ca409ba..0dea92a9079b 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -60,7 +60,10 @@ impl AttributeOperations<0> for Config {
fn show(_this: &Config, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
let mut writer = kernel::str::Formatter::new(page);
- writer.write_str("blocksize,size,rotational,irqmode,completion_nsec,memory_backed\n")?;
+ writer.write_str(
+ "blocksize,size,rotational,irqmode,completion_nsec,memory_backed,\
+ submit_queues\n",
+ )?;
Ok(writer.bytes_written())
}
}
@@ -85,6 +88,7 @@ fn make_group(
irqmode: 4,
completion_nsec: 5,
memory_backed: 6,
+ submit_queues: 7,
],
};
@@ -103,6 +107,7 @@ fn make_group(
completion_time: time::Delta::ZERO,
name: name.try_into()?,
memory_backed: false,
+ submit_queues: 1,
}),
}),
core::iter::empty(),
@@ -168,6 +173,7 @@ struct DeviceConfigInner {
completion_time: time::Delta,
disk: Option<GenDisk<NullBlkDevice>>,
memory_backed: bool,
+ submit_queues: u32,
}
#[vtable]
@@ -191,15 +197,16 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
let mut guard = this.data.lock();
if !guard.powered && power_op {
- guard.disk = Some(NullBlkDevice::new(
- &guard.name,
- guard.block_size,
- guard.rotational,
- guard.capacity_mib,
- guard.irq_mode,
- guard.completion_time,
- guard.memory_backed,
- )?);
+ guard.disk = Some(NullBlkDevice::new(crate::NullBlkOptions {
+ name: &guard.name,
+ block_size: guard.block_size,
+ rotational: guard.rotational,
+ capacity_mib: guard.capacity_mib,
+ irq_mode: guard.irq_mode,
+ completion_time: guard.completion_time,
+ memory_backed: guard.memory_backed,
+ submit_queues: guard.submit_queues,
+ })?);
guard.powered = true;
} else if guard.powered && !power_op {
drop(guard.disk.take());
@@ -232,3 +239,32 @@ fn from_str(s: &str) -> Result<Self> {
}
configfs_simple_bool_field!(DeviceConfig, 6, memory_backed);
+
+#[vtable]
+impl configfs::AttributeOperations<7> for DeviceConfig {
+ type Data = DeviceConfig;
+
+ fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+ writer.write_fmt(fmt!("{}\n", this.data.lock().submit_queues))?;
+ Ok(writer.bytes_written())
+ }
+
+ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
+ if this.data.lock().powered {
+ return Err(EBUSY);
+ }
+
+ let text = core::str::from_utf8(page)?.trim();
+ let value = text
+ .parse::<u32>()
+ .map_err(|_| kernel::error::code::EINVAL)?;
+
+ if value == 0 || value > kernel::cpu::num_possible_cpus() {
+ return Err(kernel::error::code::EINVAL);
+ }
+
+ this.data.lock().submit_queues = value;
+ Ok(())
+ }
+}
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 8e4d2b270bcf..a7c35f33631a 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -78,6 +78,10 @@
default: false,
description: "Create a memory-backed block device.",
},
+ submit_queues: u32 {
+ default: 1,
+ description: "Number of submission queues",
+ },
},
}
@@ -100,15 +104,16 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
for i in 0..module_parameters::nr_devices.value() {
let name = CString::try_from_fmt(fmt!("rnullb{}", i))?;
- let disk = NullBlkDevice::new(
- &name,
- module_parameters::bs.value(),
- module_parameters::rotational.value(),
- module_parameters::gb.value() * 1024,
- module_parameters::irqmode.value().try_into()?,
- Delta::from_nanos(completion_time),
- module_parameters::memory_backed.value(),
- )?;
+ let disk = NullBlkDevice::new(NullBlkOptions {
+ name: &name,
+ block_size: module_parameters::bs.value(),
+ rotational: module_parameters::rotational.value(),
+ capacity_mib: module_parameters::gb.value() * 1024,
+ irq_mode: module_parameters::irqmode.value().try_into()?,
+ completion_time: Delta::from_nanos(completion_time),
+ memory_backed: module_parameters::memory_backed.value(),
+ submit_queues: module_parameters::submit_queues.value(),
+ })?;
disks.push(disk, GFP_KERNEL)?;
}
@@ -122,25 +127,38 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
}
}
+struct NullBlkOptions<'a> {
+ name: &'a CStr,
+ block_size: u32,
+ rotational: bool,
+ capacity_mib: u64,
+ irq_mode: IRQMode,
+ completion_time: Delta,
+ memory_backed: bool,
+ submit_queues: u32,
+}
struct NullBlkDevice;
impl NullBlkDevice {
- fn new(
- name: &CStr,
- block_size: u32,
- rotational: bool,
- capacity_mib: u64,
- irq_mode: IRQMode,
- completion_time: Delta,
- memory_backed: bool,
- ) -> Result<GenDisk<Self>> {
+ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
+ let NullBlkOptions {
+ name,
+ block_size,
+ rotational,
+ capacity_mib,
+ irq_mode,
+ completion_time,
+ memory_backed,
+ submit_queues,
+ } = options;
+
let flags = if memory_backed {
mq::tag_set::Flag::Blocking.into()
} else {
mq::tag_set::Flags::default()
};
- let tagset = Arc::pin_init(TagSet::new(1, 256, 1, flags), GFP_KERNEL)?;
+ let tagset = Arc::pin_init(TagSet::new(submit_queues, 256, 1, flags), GFP_KERNEL)?;
let queue_data = Box::pin_init(
pin_init!(QueueData {
--
2.51.2
^ permalink raw reply related
* [PATCH v2 30/83] block: rnull: add badblocks_once support
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add support for the badblocks_once feature, which automatically clears
bad blocks after they are encountered during I/O operations. This
matches the functionality in the C null_blk driver.
When badblocks_once is enabled:
- Bad blocks are checked during I/O requests as usual
- If a bad block is encountered, the I/O is marked as failed
- The bad block range is immediately cleared from the bad blocks table
- Subsequent I/O to the same sectors will succeed
This feature is useful for testing scenarios where bad blocks are
transient or where devices can recover from bad sectors after a single
access attempt.
The feature is configurable via the configfs badblocks_once attribute
and disabled by default, maintaining compatibility with existing
behavior.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 6 ++++++
drivers/block/rnull/rnull.rs | 21 +++++++++++++++------
2 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 4db3ba26c2d1..05229ba9173a 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -102,6 +102,7 @@ fn make_group(
discard: 10,
no_sched:11,
badblocks: 12,
+ badblocks_once: 13,
],
};
@@ -125,6 +126,7 @@ fn make_group(
discard: false,
no_sched: false,
bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
+ bad_blocks_once: false,
}),
}),
core::iter::empty(),
@@ -195,6 +197,7 @@ struct DeviceConfigInner {
discard: bool,
no_sched: bool,
bad_blocks: Arc<BadBlocks>,
+ bad_blocks_once: bool,
}
#[vtable]
@@ -231,6 +234,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
discard: guard.discard,
no_sched: guard.no_sched,
bad_blocks: guard.bad_blocks.clone(),
+ bad_blocks_once: guard.bad_blocks_once,
})?);
guard.powered = true;
} else if guard.powered && !power_op {
@@ -383,3 +387,5 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
Ok(())
}
}
+
+configfs_simple_bool_field!(DeviceConfig, 13, bad_blocks_once);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 90dbf318c2f8..5486eb6dd921 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -159,6 +159,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
discard: module_parameters::discard.value(),
no_sched: module_parameters::no_sched.value(),
bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
+ bad_blocks_once: false,
})?;
disks.push(disk, GFP_KERNEL)?;
}
@@ -186,6 +187,7 @@ struct NullBlkOptions<'a> {
discard: bool,
no_sched: bool,
bad_blocks: Arc<BadBlocks>,
+ bad_blocks_once: bool,
}
struct NullBlkDevice;
@@ -204,6 +206,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
discard,
no_sched,
bad_blocks,
+ bad_blocks_once,
} = options;
let mut flags = mq::tag_set::Flags::default();
@@ -246,6 +249,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
memory_backed,
block_size: block_size.into(),
bad_blocks,
+ bad_blocks_once,
}),
GFP_KERNEL,
)?;
@@ -416,6 +420,7 @@ struct QueueData {
memory_backed: bool,
block_size: u64,
bad_blocks: Arc<BadBlocks>,
+ bad_blocks_once: bool,
}
#[pin_data]
@@ -465,12 +470,16 @@ fn queue_rq(
if queue_data.bad_blocks.enabled() {
let start = rq.sector();
let end = start + u64::from(rq.sectors());
- if !matches!(
- queue_data.bad_blocks.check(start..end),
- badblocks::BlockStatus::None
- ) {
- rq.data_ref().error.store(1, ordering::Relaxed);
- }
+ match queue_data.bad_blocks.check(start..end) {
+ badblocks::BlockStatus::None => {}
+ badblocks::BlockStatus::Acknowledged(range)
+ | badblocks::BlockStatus::Unacknowledged(range) => {
+ rq.data_ref().error.store(1, ordering::Relaxed);
+ if queue_data.bad_blocks_once {
+ queue_data.bad_blocks.set_good(range)?;
+ }
+ }
+ };
}
// TODO: Skip IO if bad block.
--
2.51.2
^ permalink raw reply related
* [PATCH v2 31/83] block: rust: add `Segment::truncate`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add a method that limits the remaining length of a `Segment` without
moving its offset. This complements `Segment::advance`, which can skip
data at the front but cannot trim data at the back, and gives callers a
way to clip a segment to a maximum byte count before handing it to the
existing `copy_to_page` / `copy_from_page` / `zero_page` helpers, which
already bound themselves by `Segment::len()`.
This is needed by rnull's partial bad-block I/O path, which needs to
clamp per-segment work to a sector boundary computed from the bad-block
range.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/block/bio/vec.rs | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/rust/kernel/block/bio/vec.rs b/rust/kernel/block/bio/vec.rs
index 99ab164d4038..61d83a07397f 100644
--- a/rust/kernel/block/bio/vec.rs
+++ b/rust/kernel/block/bio/vec.rs
@@ -81,6 +81,18 @@ pub fn advance(&mut self, count: u32) -> Result {
Ok(())
}
+ /// Limit the remaining length of the segment.
+ ///
+ /// Shortens the segment to at most `new_len` bytes. If `new_len` is
+ /// greater than or equal to the current remaining length, the segment is
+ /// left unchanged. The offset is not modified, so subsequent copy
+ /// operations still start from the current position.
+ pub fn truncate(&mut self, new_len: u32) {
+ if new_len < self.len() {
+ self.bio_vec.bv_len = new_len;
+ }
+ }
+
/// Copy data of this segment into `dst_page`.
///
/// Copies data from the current offset to the next page boundary. That is `PAGE_SIZE -
--
2.51.2
^ permalink raw reply related
* [PATCH v2 05/83] block: rnull: add macros to define configfs attributes
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Defining configfs attributes in rust is a bit verbose at the moment. Add
some macros to make the attribute definition less verbose.
The configfs Rust abstractions should eventually provide procedural macros
for this task. When we get more users of the configfs Rust abstractions, we
shall consider this task.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 134 +++++++++---------------------
drivers/block/rnull/configfs/macros.rs | 143 +++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+), 98 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index b165347e9413..fd309fc17e66 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -1,18 +1,39 @@
// SPDX-License-Identifier: GPL-2.0
-use super::{NullBlkDevice, THIS_MODULE};
+use super::{
+ NullBlkDevice,
+ THIS_MODULE, //
+};
use kernel::{
- block::mq::gen_disk::{GenDisk, GenDiskBuilder},
- configfs::{self, AttributeOperations},
+ block::mq::gen_disk::{
+ GenDisk,
+ GenDiskBuilder, //
+ },
+ configfs::{
+ self,
+ AttributeOperations, //
+ },
configfs_attrs,
- fmt::{self, Write as _},
+ fmt::{
+ self,
+ Write as _, //
+ },
new_mutex,
page::PAGE_SIZE,
prelude::*,
- str::{kstrtobool_bytes, CString},
- sync::Mutex,
+ str::{
+ kstrtobool_bytes,
+ CString, //
+ },
+ sync::Mutex, //
+};
+use macros::{
+ configfs_simple_bool_field,
+ configfs_simple_field, //
};
+mod macros;
+
pub(crate) fn subsystem() -> impl PinInit<kernel::configfs::Subsystem<Config>, Error> {
let item_type = configfs_attrs! {
container: configfs::Subsystem<Config>,
@@ -164,99 +185,16 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
}
}
-#[vtable]
-impl configfs::AttributeOperations<1> for DeviceConfig {
- type Data = DeviceConfig;
-
- fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
- let mut writer = kernel::str::Formatter::new(page);
- writer.write_fmt(fmt!("{}\n", this.data.lock().block_size))?;
- Ok(writer.bytes_written())
- }
-
- fn store(this: &DeviceConfig, page: &[u8]) -> Result {
- if this.data.lock().powered {
- return Err(EBUSY);
- }
-
- let text = core::str::from_utf8(page)?.trim();
- let value = text.parse::<u32>().map_err(|_| EINVAL)?;
-
- GenDiskBuilder::validate_block_size(value)?;
- this.data.lock().block_size = value;
- Ok(())
- }
-}
-
-#[vtable]
-impl configfs::AttributeOperations<2> for DeviceConfig {
- type Data = DeviceConfig;
-
- fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
- let mut writer = kernel::str::Formatter::new(page);
-
- if this.data.lock().rotational {
- writer.write_str("1\n")?;
- } else {
- writer.write_str("0\n")?;
- }
-
- Ok(writer.bytes_written())
- }
-
- fn store(this: &DeviceConfig, page: &[u8]) -> Result {
- if this.data.lock().powered {
- return Err(EBUSY);
- }
-
- this.data.lock().rotational = kstrtobool_bytes(page)?;
-
- Ok(())
- }
-}
-
-#[vtable]
-impl configfs::AttributeOperations<3> for DeviceConfig {
- type Data = DeviceConfig;
-
- fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
- let mut writer = kernel::str::Formatter::new(page);
- writer.write_fmt(fmt!("{}\n", this.data.lock().capacity_mib))?;
- Ok(writer.bytes_written())
- }
-
- fn store(this: &DeviceConfig, page: &[u8]) -> Result {
- if this.data.lock().powered {
- return Err(EBUSY);
- }
-
- let text = core::str::from_utf8(page)?.trim();
- let value = text.parse::<u64>().map_err(|_| EINVAL)?;
-
- this.data.lock().capacity_mib = value;
- Ok(())
- }
-}
-
-#[vtable]
-impl configfs::AttributeOperations<4> for DeviceConfig {
- type Data = DeviceConfig;
-
- fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
- let mut writer = kernel::str::Formatter::new(page);
- writer.write_fmt(fmt!("{}\n", this.data.lock().irq_mode))?;
- Ok(writer.bytes_written())
- }
+configfs_simple_field!(DeviceConfig, 1, block_size, u32, check GenDiskBuilder::validate_block_size);
+configfs_simple_bool_field!(DeviceConfig, 2, rotational);
+configfs_simple_field!(DeviceConfig, 3, capacity_mib, u64);
+configfs_simple_field!(DeviceConfig, 4, irq_mode, IRQMode);
- fn store(this: &DeviceConfig, page: &[u8]) -> Result {
- if this.data.lock().powered {
- return Err(EBUSY);
- }
+impl core::str::FromStr for IRQMode {
+ type Err = Error;
- let text = core::str::from_utf8(page)?.trim();
- let value = text.parse::<u8>().map_err(|_| EINVAL)?;
-
- this.data.lock().irq_mode = IRQMode::try_from(value)?;
- Ok(())
+ fn from_str(s: &str) -> Result<Self> {
+ let value: u8 = s.parse().map_err(|_| EINVAL)?;
+ value.try_into()
}
}
diff --git a/drivers/block/rnull/configfs/macros.rs b/drivers/block/rnull/configfs/macros.rs
new file mode 100644
index 000000000000..30bb32238457
--- /dev/null
+++ b/drivers/block/rnull/configfs/macros.rs
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use super::{
+ DeviceConfig,
+ DeviceConfigInner, //
+};
+use core::str::FromStr;
+use kernel::{
+ fmt::{
+ self,
+ Write, //
+ },
+ page::PAGE_SIZE,
+ prelude::*,
+};
+
+pub(crate) fn show_field<T: fmt::Display>(value: T, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+ writer.write_fmt(fmt!("{}\n", value))?;
+ Ok(writer.bytes_written())
+}
+
+// The lock guard is passed to `store_fn` so the powered check and the
+// store happen atomically. Releasing the lock between the two would
+// allow another writer to power the device on in the gap.
+pub(crate) fn store_with_power_check<F>(this: &DeviceConfig, page: &[u8], store_fn: F) -> Result
+where
+ F: FnOnce(&mut DeviceConfigInner, &[u8]) -> Result,
+{
+ let mut guard = this.data.lock();
+ if guard.powered {
+ return Err(EBUSY);
+ }
+ store_fn(&mut guard, page)
+}
+
+pub(crate) fn store_number_with_power_check<F, T>(
+ this: &DeviceConfig,
+ page: &[u8],
+ store_fn: F,
+) -> Result
+where
+ F: FnOnce(&mut DeviceConfigInner, T) -> Result,
+ T: FromStr,
+{
+ let text = core::str::from_utf8(page)?.trim();
+ let value = text.parse::<T>().map_err(|_| EINVAL)?;
+
+ let mut guard = this.data.lock();
+ if guard.powered {
+ return Err(EBUSY);
+ }
+
+ store_fn(&mut guard, value)
+}
+
+macro_rules! configfs_attribute {
+ (
+ $type:ty,
+ $id:literal,
+ show: |$show_this:ident, $show_page:ident| $show_block:expr,
+ store: |$store_this:ident, $store_page:ident| $store_block:expr
+ $(,)?
+ ) => {
+ #[vtable]
+ impl configfs::AttributeOperations<$id> for $type {
+ type Data = $type;
+
+ fn show($show_this: &$type, $show_page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ $show_block
+ }
+
+ fn store($store_this: &$type, $store_page: &[u8]) -> Result {
+ $store_block
+ }
+ }
+ };
+}
+pub(crate) use configfs_attribute;
+
+// Specialized macro for simple boolean fields that just store kstrtobool_bytes result.
+macro_rules! configfs_simple_bool_field {
+ ($type:ty, $id:literal, $field:ident) => {
+ crate::configfs::macros::configfs_attribute!($type, $id,
+ show: |this, page| crate::configfs::macros::show_field(this.data.lock().$field, page),
+ store: |this, page|
+ crate::configfs::macros::store_with_power_check(this, page, |data, page| {
+ data.$field = kstrtobool_bytes(page)?;
+ Ok(())
+ })
+ );
+ };
+}
+pub(crate) use configfs_simple_bool_field;
+
+// Specialized macro for simple numeric fields that just parse and assign
+macro_rules! configfs_simple_field {
+ // Simple direct assignment
+ ($type:ty, $id:literal, $field:ident, $field_type:ty) => {
+ crate::configfs::macros::configfs_attribute!($type, $id,
+ show: |this, page| crate::configfs::macros::show_field(this.data.lock().$field, page),
+ store: |this, page| crate::configfs::macros::store_number_with_power_check(
+ this,
+ page,
+ |data, value: $field_type| {
+ data.$field = value;
+ Ok(())
+ }
+ )
+ );
+ };
+ // With infallible conversion expression (direct value)
+ ($type:ty, $id:literal, $field:ident, $field_type:ty, into $convert:expr) => {
+ crate::configfs::macros::configfs_attribute!($type, $id,
+ show: |this, page|
+ crate::configfs::macros::show_field(this.data.lock().$field, page),
+ store: |this, page| crate::configfs::macros::store_number_with_power_check(
+ this,
+ page,
+ |data, value: $field_type| {
+ data.$field = $convert(value);
+ Ok(())
+ }
+ )
+ );
+ };
+ // With check, no conversion
+ ($type:ty, $id:literal, $field:ident, $field_type:ty, check $check:expr) => {
+ crate::configfs::macros::configfs_attribute!($type, $id,
+ show: |this, page| crate::configfs::macros::show_field(this.data.lock().$field, page),
+ store: |this, page| crate::configfs::macros::store_number_with_power_check(
+ this,
+ page,
+ |data, value: $field_type| {
+ $check(value)?;
+ data.$field = value;
+ Ok(())
+ }
+ )
+ );
+ };
+}
+pub(crate) use configfs_simple_field;
--
2.51.2
^ permalink raw reply related
* [PATCH v2 32/83] block: rnull: add partial I/O support for bad blocks
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add bad_blocks_partial_io configuration option that allows partial I/O
completion when encountering bad blocks, rather than failing the entire
request.
When enabled, requests are truncated to stop before the first bad block
range, allowing the valid portion to be processed successfully. This
improves compatibility with applications that can handle partial
reads/writes.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 5 ++
drivers/block/rnull/rnull.rs | 126 +++++++++++++++++++++++++++++-----------
2 files changed, 97 insertions(+), 34 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 05229ba9173a..0e9fe8cdc07f 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -103,6 +103,7 @@ fn make_group(
no_sched:11,
badblocks: 12,
badblocks_once: 13,
+ badblocks_partial_io: 14,
],
};
@@ -127,6 +128,7 @@ fn make_group(
no_sched: false,
bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
bad_blocks_once: false,
+ bad_blocks_partial_io: false,
}),
}),
core::iter::empty(),
@@ -198,6 +200,7 @@ struct DeviceConfigInner {
no_sched: bool,
bad_blocks: Arc<BadBlocks>,
bad_blocks_once: bool,
+ bad_blocks_partial_io: bool,
}
#[vtable]
@@ -235,6 +238,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
no_sched: guard.no_sched,
bad_blocks: guard.bad_blocks.clone(),
bad_blocks_once: guard.bad_blocks_once,
+ bad_blocks_partial_io: guard.bad_blocks_partial_io,
})?);
guard.powered = true;
} else if guard.powered && !power_op {
@@ -389,3 +393,4 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
}
configfs_simple_bool_field!(DeviceConfig, 13, bad_blocks_once);
+configfs_simple_bool_field!(DeviceConfig, 14, bad_blocks_partial_io);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 5486eb6dd921..be0b4bd25e53 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -160,6 +160,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
no_sched: module_parameters::no_sched.value(),
bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
bad_blocks_once: false,
+ bad_blocks_partial_io: false,
})?;
disks.push(disk, GFP_KERNEL)?;
}
@@ -188,6 +189,7 @@ struct NullBlkOptions<'a> {
no_sched: bool,
bad_blocks: Arc<BadBlocks>,
bad_blocks_once: bool,
+ bad_blocks_partial_io: bool,
}
struct NullBlkDevice;
@@ -207,6 +209,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
no_sched,
bad_blocks,
bad_blocks_once,
+ bad_blocks_partial_io,
} = options;
let mut flags = mq::tag_set::Flags::default();
@@ -250,6 +253,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
block_size: block_size.into(),
bad_blocks,
bad_blocks_once,
+ bad_blocks_partial_io,
}),
GFP_KERNEL,
)?;
@@ -352,15 +356,66 @@ fn discard(tree: &XArray<TreeNode>, mut sector: u64, sectors: u64, block_size: u
#[inline(never)]
fn transfer(
- command: bindings::req_op,
+ rq: &mut Owned<mq::Request<Self>>,
tree: &XArray<TreeNode>,
- sector: u64,
- segment: Segment<'_>,
+ max_sectors: u32,
) -> Result {
- match command {
- bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?,
- bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?,
- _ => (),
+ let mut sector = rq.sector();
+ let max_end_sector = sector + <u32 as Into<u64>>::into(max_sectors);
+ let command = rq.command();
+
+ for bio in rq.bio_iter_mut() {
+ let segment_iter = bio.segment_iter();
+ for mut segment in segment_iter {
+ // Length might be limited by bad blocks.
+ let segment_length_sectors = segment.len() >> SECTOR_SHIFT;
+ let max_remaining_sectors = (max_end_sector - sector) as u32;
+ let length_sectors_allowed = segment_length_sectors.min(max_remaining_sectors);
+ segment.truncate(length_sectors_allowed << SECTOR_SHIFT);
+ match command {
+ bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?,
+ bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?,
+ _ => (),
+ }
+ sector += u64::from(length_sectors_allowed);
+
+ if sector >= max_end_sector {
+ return Ok(());
+ }
+ }
+ }
+ Ok(())
+ }
+
+ fn handle_bad_blocks(
+ rq: &mut Owned<mq::Request<Self>>,
+ queue_data: &QueueData,
+ sectors: &mut u32,
+ ) -> Result {
+ if queue_data.bad_blocks.enabled() {
+ let start = rq.sector();
+ let end = start + u64::from(*sectors);
+ match queue_data.bad_blocks.check(start..end) {
+ badblocks::BlockStatus::None => {}
+ badblocks::BlockStatus::Acknowledged(mut range)
+ | badblocks::BlockStatus::Unacknowledged(mut range) => {
+ rq.data_ref().error.store(1, ordering::Relaxed);
+
+ if queue_data.bad_blocks_once {
+ queue_data.bad_blocks.set_good(range.clone())?;
+ }
+
+ if queue_data.bad_blocks_partial_io {
+ let block_size_sectors = queue_data.block_size >> SECTOR_SHIFT;
+ range.start = align_down(range.start, block_size_sectors);
+ if start < range.start {
+ *sectors = (range.start - start) as u32;
+ }
+ } else {
+ *sectors = 0;
+ }
+ }
+ };
}
Ok(())
}
@@ -421,6 +476,7 @@ struct QueueData {
block_size: u64,
bad_blocks: Arc<BadBlocks>,
bad_blocks_once: bool,
+ bad_blocks_partial_io: bool,
}
#[pin_data]
@@ -449,6 +505,30 @@ impl HasHrTimer<Self> for Pdu {
}
}
+fn is_power_of_two<T>(value: T) -> bool
+where
+ T: core::ops::Sub<T, Output = T>,
+ T: core::ops::BitAnd<Output = T>,
+ T: core::cmp::PartialOrd<T>,
+ T: Copy,
+ T: From<u8>,
+{
+ (value > 0u8.into()) && (value & (value - 1u8.into())) == 0u8.into()
+}
+
+fn align_down<T>(value: T, to: T) -> T
+where
+ T: core::ops::Sub<T, Output = T>,
+ T: core::ops::Not<Output = T>,
+ T: core::ops::BitAnd<Output = T>,
+ T: core::cmp::PartialOrd<T>,
+ T: Copy,
+ T: From<u8>,
+{
+ debug_assert!(is_power_of_two(to));
+ value & !(to - 1u8.into())
+}
+
#[vtable]
impl Operations for NullBlkDevice {
type QueueData = Pin<KBox<QueueData>>;
@@ -467,40 +547,18 @@ fn queue_rq(
mut rq: Owned<mq::Request<Self>>,
_is_last: bool,
) -> Result {
- if queue_data.bad_blocks.enabled() {
- let start = rq.sector();
- let end = start + u64::from(rq.sectors());
- match queue_data.bad_blocks.check(start..end) {
- badblocks::BlockStatus::None => {}
- badblocks::BlockStatus::Acknowledged(range)
- | badblocks::BlockStatus::Unacknowledged(range) => {
- rq.data_ref().error.store(1, ordering::Relaxed);
- if queue_data.bad_blocks_once {
- queue_data.bad_blocks.set_good(range)?;
- }
- }
- };
- }
+ let mut sectors = rq.sectors();
- // TODO: Skip IO if bad block.
+ Self::handle_bad_blocks(&mut rq, queue_data.get_ref(), &mut sectors)?;
if queue_data.memory_backed {
memalloc_scope!(let _noio: NoIo);
let tree = &queue_data.tree;
- let command = rq.command();
- let mut sector = rq.sector();
- if command == bindings::req_op_REQ_OP_DISCARD {
- Self::discard(tree, sector, rq.sectors().into(), queue_data.block_size)?;
+ if rq.command() == bindings::req_op_REQ_OP_DISCARD {
+ Self::discard(tree, rq.sector(), sectors.into(), queue_data.block_size)?;
} else {
- for bio in rq.bio_iter_mut() {
- let segment_iter = bio.segment_iter();
- for segment in segment_iter {
- let length = segment.len();
- Self::transfer(command, tree, sector, segment)?;
- sector += u64::from(length) >> block::SECTOR_SHIFT;
- }
- }
+ Self::transfer(&mut rq, tree, sectors)?;
}
}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 15/83] block: rust: add `TagSet` flags
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add support for `TagSet` flags by introducing a `Flags` type and adding
a flags parameter to `TagSet::new`. This allows configuring tagset
behavior such as blocking vs non-blocking operation.
The Flags type supports bitwise operations and provides values like
`Blocking` for common use cases. The module documentation example is
updated to demonstrate the new API.
For now, only a single flag is added.
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/rnull.rs | 5 ++++-
rust/kernel/block/mq.rs | 6 +++---
rust/kernel/block/mq/tag_set.rs | 13 ++++++++++---
rust/kernel/block/mq/tag_set/flags.rs | 21 +++++++++++++++++++++
4 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 3e7a47e6d0e5..746ddadd11f0 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -128,7 +128,10 @@ fn new(
irq_mode: IRQMode,
completion_time: Delta,
) -> Result<GenDisk<Self>> {
- let tagset = Arc::pin_init(TagSet::new(1, 256, 1), GFP_KERNEL)?;
+ let tagset = Arc::pin_init(
+ TagSet::new(1, 256, 1, mq::tag_set::Flags::default()),
+ GFP_KERNEL,
+ )?;
let queue_data = Box::new(
QueueData {
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 23660817df29..e556b3bb1191 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -57,7 +57,7 @@
//!
//! ```rust
//! use kernel::{
-//! block::mq::*,
+//! block::mq::{self, *},
//! new_mutex,
//! prelude::*,
//! sync::{aref::ARef, Arc, Mutex},
@@ -92,7 +92,7 @@
//! }
//!
//! let tagset: Arc<TagSet<MyBlkDevice>> =
-//! Arc::pin_init(TagSet::new(1, 256, 1), GFP_KERNEL)?;
+//! Arc::pin_init(TagSet::new(1, 256, 1, mq::tag_set::Flags::default()), GFP_KERNEL)?;
//! let mut disk = gen_disk::GenDiskBuilder::new()
//! .capacity_sectors(4096)
//! .build(fmt!("myblk"), tagset, ())?;
@@ -103,7 +103,7 @@
pub mod gen_disk;
mod operations;
mod request;
-mod tag_set;
+pub mod tag_set;
pub use operations::Operations;
pub use request::{
diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
index ec5cac48b83f..5b1a5bcc978d 100644
--- a/rust/kernel/block/mq/tag_set.rs
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -17,7 +17,7 @@
self,
Result, //
},
- prelude::try_pin_init,
+ prelude::*,
types::Opaque,
};
use core::{
@@ -30,6 +30,12 @@
PinInit, //
};
+mod flags;
+pub use flags::{
+ Flag,
+ Flags, //
+};
+
/// A wrapper for the C `struct blk_mq_tag_set`.
///
/// `struct blk_mq_tag_set` contains a `struct list_head` and so must be pinned.
@@ -51,6 +57,7 @@ pub fn new(
nr_hw_queues: u32,
num_tags: u32,
num_maps: u32,
+ flags: Flags,
) -> impl PinInit<Self, error::Error> {
let tag_set: bindings::blk_mq_tag_set = pin_init::zeroed();
let tag_set: Result<_> = size_of::<RequestDataWrapper<T>>()
@@ -63,8 +70,8 @@ pub fn new(
numa_node: bindings::NUMA_NO_NODE,
queue_depth: num_tags,
cmd_size,
- flags: 0,
- driver_data: core::ptr::null_mut::<crate::ffi::c_void>(),
+ flags: flags.into(),
+ driver_data: core::ptr::null_mut::<c_void>(),
nr_maps: num_maps,
..tag_set
}
diff --git a/rust/kernel/block/mq/tag_set/flags.rs b/rust/kernel/block/mq/tag_set/flags.rs
new file mode 100644
index 000000000000..b7eaccd200a2
--- /dev/null
+++ b/rust/kernel/block/mq/tag_set/flags.rs
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use crate::{
+ bindings,
+ impl_flags, //
+};
+
+impl_flags! {
+ /// Flags to be used when creating [`super::TagSet`] objects.
+ #[derive(Debug, Clone, Default, Copy, PartialEq, Eq)]
+ pub struct Flags(u32);
+
+ /// Allowed values for [`Flags`].
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+ pub enum Flag {
+ /// Indicate that the queues associated with this tag set might sleep when
+ /// processing IO. When this flag is not set, IO is processed in atomic
+ /// context. When this flag is set, IO is processed in process context.
+ Blocking = bindings::BLK_MQ_F_BLOCKING,
+ }
+}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 03/83] block: rnull: adopt new formatting guidelines
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Reformat `use` statements to have one item per line as required by the
updated Rust formatting guidelines. Apply a formatting workaround to
ensure `rustfmt` produces the expected output.
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/rnull.rs | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 0ca8715febe8..d58d2c4c5f63 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -10,12 +10,19 @@
self,
mq::{
self,
- gen_disk::{self, GenDisk},
- Operations, TagSet,
+ gen_disk::{
+ self,
+ GenDisk, //
+ },
+ Operations,
+ TagSet, //
},
},
prelude::*,
- sync::{aref::ARef, Arc},
+ sync::{
+ aref::ARef,
+ Arc, //
+ },
};
module! {
--
2.51.2
^ permalink raw reply related
* [PATCH v2 80/83] block: rust: add `virt_boundary_mask` option to `GenDiskBuilder`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Allow Rust device drivers to set the `virt_boundary_mask` property for
block devices.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/block/mq/gen_disk.rs | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 6d760dafade5..38057ebc0878 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -59,6 +59,7 @@ pub struct GenDiskBuilder<T> {
write_cache: bool,
forced_unit_access: bool,
max_sectors: u32,
+ virt_boundary_mask: usize,
_p: PhantomData<T>,
}
@@ -79,6 +80,7 @@ fn default() -> Self {
write_cache: false,
forced_unit_access: false,
max_sectors: 0,
+ virt_boundary_mask: 0,
_p: PhantomData,
}
}
@@ -189,6 +191,15 @@ pub fn max_sectors(mut self, sectors: u32) -> Self {
self
}
+ /// Set the I/O segment memory alignment mask for the block device. I/O requests to this device
+ /// will be split between segments wherever either the memory address of the end of the previous
+ /// segment or the memory address of the beginning of the current segment is not aligned to
+ /// virt_boundary_mask + 1 bytes.
+ pub fn virt_boundary_mask(mut self, mask: usize) -> Self {
+ self.virt_boundary_mask = mask;
+ self
+ }
+
/// Build a new `GenDisk` and add it to the VFS.
pub fn build(
self,
@@ -208,6 +219,7 @@ pub fn build(
lim.physical_block_size = self.physical_block_size;
lim.max_hw_discard_sectors = self.max_hw_discard_sectors;
lim.max_sectors = self.max_sectors;
+ lim.virt_boundary_mask = self.virt_boundary_mask;
if self.rotational {
lim.features = Feature::Rotational.into();
}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 20/83] block: rnull: allow specifying the home numa node
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add a configfs attribute to specify the NUMA node for rnull tag set
and CPU map allocations. This allows testing NUMA-aware block device
behavior and optimizing memory placement for specific hardware
configurations.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 19 +++++++++++++++++++
drivers/block/rnull/rnull.rs | 30 ++++++++++++++++++++++--------
2 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 71b38373be33..2f3fa81ea121 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -5,6 +5,7 @@
THIS_MODULE, //
};
use kernel::{
+ bindings,
block::mq::gen_disk::{
GenDisk,
GenDiskBuilder, //
@@ -91,6 +92,7 @@ fn make_group(
memory_backed: 6,
submit_queues: 7,
use_per_node_hctx: 8,
+ home_node: 9,
],
};
@@ -110,6 +112,7 @@ fn make_group(
name: name.try_into()?,
memory_backed: false,
submit_queues: 1,
+ home_node: bindings::NUMA_NO_NODE,
}),
}),
core::iter::empty(),
@@ -176,6 +179,7 @@ struct DeviceConfigInner {
disk: Option<GenDisk<NullBlkDevice>>,
memory_backed: bool,
submit_queues: u32,
+ home_node: i32,
}
#[vtable]
@@ -208,6 +212,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
completion_time: guard.completion_time,
memory_backed: guard.memory_backed,
submit_queues: guard.submit_queues,
+ home_node: guard.home_node,
})?);
guard.powered = true;
} else if guard.powered && !power_op {
@@ -288,3 +293,17 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
Ok(())
})
);
+
+configfs_simple_field!(
+ DeviceConfig,
+ 9,
+ home_node,
+ i32,
+ check(|value| {
+ if value == 0 || value >= kernel::numa::num_online_nodes().try_into()? {
+ Err(kernel::error::code::EINVAL)
+ } else {
+ Ok(())
+ }
+ })
+);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 6323327d4a5a..1d0faf524f5c 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -20,7 +20,10 @@
TagSet, //
},
},
- error::Result,
+ error::{
+ code,
+ Result, //
+ },
memalloc_scope,
new_mutex,
new_xarray,
@@ -93,6 +96,10 @@
default: false,
description: "Use per-node allocation for hardware context queues.",
},
+ home_node: i32 {
+ default: -1,
+ description: "Home node for the device. Default: -1 (no node)",
+ },
},
}
@@ -129,6 +136,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
completion_time: Delta::from_nanos(completion_time),
memory_backed: module_parameters::memory_backed.value(),
submit_queues,
+ home_node: module_parameters::home_node.value(),
})?;
disks.push(disk, GFP_KERNEL)?;
}
@@ -152,6 +160,7 @@ struct NullBlkOptions<'a> {
completion_time: Delta,
memory_backed: bool,
submit_queues: u32,
+ home_node: i32,
}
struct NullBlkDevice;
@@ -166,6 +175,7 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
completion_time,
memory_backed,
submit_queues,
+ home_node,
} = options;
let flags = if memory_backed {
@@ -174,14 +184,18 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
mq::tag_set::Flags::default()
};
+ if home_node > kernel::numa::num_online_nodes().try_into()? {
+ return Err(code::EINVAL);
+ }
+
+ let numa_node = if home_node == -1 {
+ kernel::alloc::NumaNode::NO_NODE
+ } else {
+ kernel::alloc::NumaNode::new(home_node)?
+ };
+
let tagset = Arc::pin_init(
- TagSet::new(
- submit_queues,
- 256,
- 1,
- kernel::alloc::NumaNode::NO_NODE,
- flags,
- ),
+ TagSet::new(submit_queues, 256, 1, numa_node, flags),
GFP_KERNEL,
)?;
--
2.51.2
^ permalink raw reply related
* [PATCH v2 14/83] block: rust: mq: use GFP_KERNEL from prelude
From: Andreas Hindborg @ 2026-06-09 19:07 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Remove the explicit import of kernel::alloc::flags and use GFP_KERNEL
directly from the prelude in the module documentation example.
This simplifies the import list and follows the pattern of using
commonly used constants from the prelude.
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/block/mq.rs | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index a03d46d274a5..23660817df29 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -57,7 +57,6 @@
//!
//! ```rust
//! use kernel::{
-//! alloc::flags,
//! block::mq::*,
//! new_mutex,
//! prelude::*,
@@ -93,7 +92,7 @@
//! }
//!
//! let tagset: Arc<TagSet<MyBlkDevice>> =
-//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
+//! Arc::pin_init(TagSet::new(1, 256, 1), GFP_KERNEL)?;
//! let mut disk = gen_disk::GenDiskBuilder::new()
//! .capacity_sectors(4096)
//! .build(fmt!("myblk"), tagset, ())?;
--
2.51.2
^ permalink raw reply related
* [PATCH v2 70/83] block: rnull: support queue_rqs
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Implement the `queue_rqs` callback for rnull, allowing the block layer
to submit multiple requests in a single call. This improves performance
by reducing per-request overhead and enabling batch processing.
The implementation processes requests from the list one at a time,
removing successfully processed requests from the list.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/disk_storage.rs | 36 ++++----
drivers/block/rnull/rnull.rs | 180 +++++++++++++++++++++++-------------
2 files changed, 133 insertions(+), 83 deletions(-)
diff --git a/drivers/block/rnull/disk_storage.rs b/drivers/block/rnull/disk_storage.rs
index 4a9bf480221f..6797b7996da3 100644
--- a/drivers/block/rnull/disk_storage.rs
+++ b/drivers/block/rnull/disk_storage.rs
@@ -86,7 +86,7 @@ pub(crate) fn discard(
}
}
- pub(crate) fn flush(&self, hw_data: &Pin<&SpinLock<HwQueueContext>>) -> Result {
+ pub(crate) fn flush(&self, hw_data: &Pin<&SpinLock<HwQueueContext>>) {
let mut tree_guard = self.lock();
let mut hw_data_guard = hw_data.lock();
let mut access = self.access(&mut tree_guard, &mut hw_data_guard, None);
@@ -131,7 +131,7 @@ fn to_sector(index: usize) -> u64 {
(index << block::PAGE_SECTORS_SHIFT) as u64
}
- fn extract_cache_page(&mut self) -> Result<Option<KBox<NullBlockPage>>> {
+ fn extract_cache_page(&mut self) -> Option<KBox<NullBlockPage>> {
Self::extract_cache_page_inner(
&mut self.cache_guard,
&mut self.disk_guard,
@@ -147,16 +147,10 @@ fn extract_cache_page_inner<'g>(
disk_storage: &DiskStorage,
hw_data: &mut HwQueueContext,
sheaf: Option<&mut XArraySheaf<'_>>,
- ) -> Result<Option<KBox<NullBlockPage>>> {
+ ) -> Option<KBox<NullBlockPage>> {
let cache_entry = cache_guard.find_next_entry_circular(
disk_storage.next_flush_sector.load(ordering::Relaxed) as usize,
- );
-
- let cache_entry = if let Some(entry) = cache_entry {
- entry
- } else {
- return Ok(None);
- };
+ )?;
let index = cache_entry.index();
@@ -183,11 +177,14 @@ fn extract_cache_page_inner<'g>(
let mut src = cache_entry;
let mut offset = 0;
for _ in 0..PAGE_SECTORS {
- src.page_mut().as_pin_mut().copy_to_page(
- disk_entry.page_mut().as_pin_mut(),
- offset,
- block::SECTOR_SIZE as usize,
- )?;
+ src.page_mut()
+ .as_pin_mut()
+ .copy_to_page(
+ disk_entry.page_mut().as_pin_mut(),
+ offset,
+ block::SECTOR_SIZE as usize,
+ )
+ .expect("Write to succeed");
offset += block::SECTOR_SIZE as usize;
}
src.remove()
@@ -197,16 +194,15 @@ fn extract_cache_page_inner<'g>(
}
};
- Ok(Some(page))
+ Some(page)
}
- fn flush(&mut self) -> Result {
+ fn flush(&mut self) {
if self.disk_storage.cache_size > 0 {
- while let Some(page) = self.extract_cache_page()? {
+ while let Some(page) = self.extract_cache_page() {
drop(page);
}
}
- Ok(())
}
fn get_or_alloc_cache_page(&mut self, sector: u64) -> Result<&mut NullBlockPage> {
@@ -230,7 +226,7 @@ fn get_or_alloc_cache_page(&mut self, sector: u64) -> Result<&mut NullBlockPage>
self.disk_storage,
self.hw_data_guard,
self.sheaf.as_mut(),
- )?
+ )
.expect("Expected to find a page in the cache")
};
let xarray::Entry::Vacant(vacant_entry) = cache_guard.entry(index) else {
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 6653db5c069b..32af69bbf8f0 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -28,7 +28,7 @@
BadBlocks, //
},
bio::Segment,
- error::BlkResult,
+ error::{BlkError, BlkResult},
mq::{
self,
gen_disk::{
@@ -36,8 +36,10 @@
GenDisk,
GenDiskRef, //
},
+ IdleRequest,
IoCompletionBatch,
Operations,
+ RequestList,
TagSet, //
},
SECTOR_SHIFT,
@@ -773,6 +775,104 @@ fn complete_request(&self, rq: Owned<mq::Request<Self>>) {
}
}
}
+
+ #[inline(always)]
+ fn queue_rq_internal(
+ hw_data: Pin<&SpinLock<HwQueueContext>>,
+ this: ArcBorrow<'_, Self>,
+ rq: Owned<mq::IdleRequest<Self>>,
+ _is_last: bool,
+ ) -> Result<(), QueueRequestError> {
+ if this.bandwidth_limit != 0 {
+ if !this.bandwidth_timer.active() {
+ drop(this.bandwidth_timer_handle.lock().take());
+ let arc: Arc<_> = this.into();
+ *this.bandwidth_timer_handle.lock() =
+ Some(arc.start(Self::BANDWIDTH_TIMER_INTERVAL));
+ }
+
+ if this
+ .bandwidth_bytes
+ .fetch_add(u64::from(rq.bytes()), ordering::Relaxed)
+ + u64::from(rq.bytes())
+ > this.bandwidth_limit
+ {
+ rq.queue().stop_hw_queues();
+ if this.bandwidth_bytes.load(ordering::Relaxed) <= this.bandwidth_limit {
+ rq.queue().start_stopped_hw_queues_async();
+ }
+
+ return Err(QueueRequestError { request: rq });
+ }
+ }
+
+ let mut rq = rq.start();
+
+ if rq.command() == mq::Command::Flush {
+ if this.memory_backed {
+ this.storage.flush(&hw_data);
+ }
+ this.complete_request(rq);
+
+ return Ok(());
+ }
+
+ let status = (|| -> Result {
+ #[cfg(CONFIG_BLK_DEV_ZONED)]
+ if this.zoned.enabled {
+ this.handle_zoned_command(&hw_data, &mut rq)?;
+ } else {
+ this.handle_regular_command(&hw_data, &mut rq)?;
+ }
+
+ #[cfg(not(CONFIG_BLK_DEV_ZONED))]
+ this.handle_regular_command(&hw_data, &mut rq)?;
+
+ Ok(())
+ })();
+
+ if let Err(e) = status {
+ // Do not overwrite existing error. We do not care whether this write fails.
+ let _ = rq
+ .data_ref()
+ .error
+ .cmpxchg(0, e.to_errno(), ordering::Relaxed);
+ }
+
+ if rq.is_poll() {
+ // NOTE: We lack the ability to insert `Owned<Request>` into a
+ // `kernel::list::List`, so we use a `RingBuffer` instead. The
+ // drawback of this is that we have to allocate the space for the
+ // ring buffer during drive initialization, and we have to hold the
+ // lock protecting the list until we have processed all the requests
+ // in the list. Change to a linked list when the kernel gets this
+ // ability.
+
+ // NOTE: We are processing requests during submit rather than during
+ // poll. This is different from C driver. C driver does processing
+ // during poll.
+
+ hw_data
+ .lock()
+ .poll_queue
+ .push_head(rq)
+ .expect("Buffer is sized to hold all in flight requests");
+ } else {
+ this.complete_request(rq);
+ }
+
+ Ok(())
+ }
+}
+
+struct QueueRequestError {
+ request: Owned<IdleRequest<NullBlkDevice>>,
+}
+
+impl From<QueueRequestError> for BlkError {
+ fn from(_value: QueueRequestError) -> Self {
+ kernel::block::error::code::BLK_STS_IOERR
+ }
}
impl_has_hr_timer! {
@@ -814,7 +914,7 @@ struct HwQueueContext {
struct Pdu {
#[pin]
timer: HrTimer<Self>,
- error: Atomic<u32>,
+ error: Atomic<i32>,
}
impl HrTimerCallback for Pdu {
@@ -855,76 +955,31 @@ fn new_request_data() -> impl PinInit<Self::RequestData> {
})
}
- #[inline(always)]
fn queue_rq(
hw_data: Pin<&SpinLock<HwQueueContext>>,
this: ArcBorrow<'_, Self>,
rq: Owned<mq::IdleRequest<Self>>,
- _is_last: bool,
- is_poll: bool,
+ is_last: bool,
+ _is_poll: bool,
) -> BlkResult {
- if this.bandwidth_limit != 0 {
- if !this.bandwidth_timer.active() {
- drop(this.bandwidth_timer_handle.lock().take());
- let arc: Arc<_> = this.into();
- *this.bandwidth_timer_handle.lock() =
- Some(arc.start(Self::BANDWIDTH_TIMER_INTERVAL));
- }
+ Ok(Self::queue_rq_internal(hw_data, this, rq, is_last)?)
+ }
- if this
- .bandwidth_bytes
- .fetch_add(u64::from(rq.bytes()), ordering::Relaxed)
- + u64::from(rq.bytes())
- > this.bandwidth_limit
+ fn queue_rqs(
+ hw_data: Pin<&SpinLock<HwQueueContext>>,
+ this: ArcBorrow<'_, Self>,
+ requests: &mut RequestList<Self>,
+ ) {
+ let mut requeue = RequestList::new();
+ while let Some(request) = requests.pop() {
+ if let Err(QueueRequestError { request }) =
+ Self::queue_rq_internal(hw_data, this, request, false)
{
- rq.queue().stop_hw_queues();
- if this.bandwidth_bytes.load(ordering::Relaxed) <= this.bandwidth_limit {
- rq.queue().start_stopped_hw_queues_async();
- }
-
- return Err(kernel::block::error::code::BLK_STS_DEV_RESOURCE);
+ requeue.push_tail(request);
}
}
- let mut rq = rq.start();
-
- if rq.command() == mq::Command::Flush {
- if this.memory_backed {
- this.storage.flush(&hw_data)?;
- }
- this.complete_request(rq);
-
- return Ok(());
- }
-
- #[cfg(CONFIG_BLK_DEV_ZONED)]
- if this.zoned.enabled {
- this.handle_zoned_command(&hw_data, &mut rq)?;
- } else {
- this.handle_regular_command(&hw_data, &mut rq)?;
- }
-
- #[cfg(not(CONFIG_BLK_DEV_ZONED))]
- this.handle_regular_command(&hw_data, &mut rq)?;
-
- if is_poll {
- // NOTE: We lack the ability to insert `Owned<Request>` into a
- // `kernel::list::List`, so we use a `RingBuffer` instead. The
- // drawback of this is that we have to allocate the space for the
- // ring buffer during drive initialization, and we have to hold the
- // lock protecting the list until we have processed all the requests
- // in the list. Change to a linked list when the kernel gets this
- // ability.
-
- // NOTE: We are processing requests during submit rather than during
- // poll. This is different from C driver. C driver does processing
- // during poll.
-
- hw_data.lock().poll_queue.push_head(rq)?;
- } else {
- this.complete_request(rq);
- }
- Ok(())
+ drop(core::mem::replace(requests, requeue));
}
fn commit_rqs(_hw_data: Pin<&SpinLock<HwQueueContext>>, _queue_data: ArcBorrow<'_, Self>) {}
@@ -941,7 +996,6 @@ fn poll(
let status = rq.data_ref().error.load(ordering::Relaxed);
rq.data_ref().error.store(0, ordering::Relaxed);
- // TODO: check error handling via status
if let Err(rq) = batch.add_request(rq, status != 0) {
Self::end_request(rq);
}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 45/83] block: rnull: add blocking queue mode
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add support for blocking queue mode via the `blocking` configfs
attribute. When enabled, the tag set is created with the
`BLK_MQ_F_BLOCKING` flag.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 7 ++++++-
drivers/block/rnull/rnull.rs | 9 ++++++++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index 59217d75f46b..5e6bcf9d31d8 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -69,7 +69,7 @@ impl AttributeOperations<0> for Config {
let mut writer = kernel::str::Formatter::new(page);
writer.write_str(
"blocksize,size,rotational,irqmode,completion_nsec,memory_backed,\
- submit_queues,use_per_node_hctx\n",
+ submit_queues,use_per_node_hctx,discard,blocking\n",
)?;
Ok(writer.bytes_written())
}
@@ -105,6 +105,7 @@ fn make_group(
badblocks_partial_io: 14,
cache_size_mib: 15,
mbps: 16,
+ blocking: 17,
],
};
@@ -137,6 +138,7 @@ fn make_group(
)?,
cache_size_mib: 0,
mbps: 0,
+ blocking: false,
}),
}),
core::iter::empty(),
@@ -212,6 +214,7 @@ struct DeviceConfigInner {
cache_size_mib: u64,
disk_storage: Arc<DiskStorage>,
mbps: u32,
+ blocking: bool,
}
#[vtable]
@@ -252,6 +255,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
bad_blocks_partial_io: guard.bad_blocks_partial_io,
storage: guard.disk_storage.clone(),
bandwidth_limit: u64::from(guard.mbps) * 2u64.pow(20),
+ blocking: guard.blocking,
})?);
guard.powered = true;
} else if guard.powered && !power_op {
@@ -422,3 +426,4 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
);
configfs_simple_field!(DeviceConfig, 16, mbps, u32);
+configfs_simple_bool_field!(DeviceConfig, 17, blocking);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 1dda8d717b95..181fce551a91 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -139,6 +139,10 @@
default: 0,
description: "Max bandwidth in MiB/s. 0 means no limit.",
},
+ blocking: bool {
+ default: false,
+ description: "Register as a blocking blk-mq driver device",
+ },
},
}
@@ -185,6 +189,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
bad_blocks_partial_io: false,
storage: Arc::pin_init(DiskStorage::new(0, block_size as usize), GFP_KERNEL)?,
bandwidth_limit: u64::from(module_parameters::mbps.value()) * 2u64.pow(20),
+ blocking: module_parameters::blocking.value(),
})?;
disks.push(disk, GFP_KERNEL)?;
}
@@ -216,6 +221,7 @@ struct NullBlkOptions<'a> {
bad_blocks_partial_io: bool,
storage: Arc<DiskStorage>,
bandwidth_limit: u64,
+ blocking: bool,
}
#[pin_data]
@@ -258,11 +264,12 @@ fn new(options: NullBlkOptions<'_>) -> Result<Arc<GenDisk<Self>>> {
bad_blocks_partial_io,
storage,
bandwidth_limit,
+ blocking,
} = options;
let mut flags = mq::tag_set::Flags::default();
- if memory_backed {
+ if blocking || memory_backed {
flags |= mq::tag_set::Flag::Blocking;
}
--
2.51.2
^ permalink raw reply related
* [PATCH v2 43/83] block: rust: add `GenDisk::queue_data`
From: Andreas Hindborg @ 2026-06-09 19:08 UTC (permalink / raw)
To: Liam R. Howlett, Alice Ryhl, Anna-Maria Behnsen, Benno Lossin,
Björn Roy Baron, Boqun Feng, Danilo Krummrich,
FUJITA Tomonori, Frederic Weisbecker, Gary Guo, Jens Axboe,
John Stultz, Lorenzo Stoakes, Lyude Paul, Miguel Ojeda,
Stephen Boyd, Thomas Gleixner, Trevor Gross, Liam R. Howlett,
Boqun Feng, Lorenzo Stoakes
Cc: Andreas Hindborg, linux-block, linux-kernel, linux-mm,
rust-for-linux
In-Reply-To: <20260609-rnull-v6-19-rc5-send-v2-0-82c7404542e2@kernel.org>
Add a method to borrow the private queue data of the queue a `GenDisk` is
associated with.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/block/mq/gen_disk.rs | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index 6ba8d88f63a9..49ce5ac4774d 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -260,6 +260,12 @@ pub fn queue(&self) -> &RequestQueue<T> {
// SAFETY: By type invariant, self is a valid gendisk.
unsafe { RequestQueue::from_raw((*self.gendisk).queue) }
}
+
+ /// Get the queue data associated with this [`GenDisk`].
+ pub fn queue_data(&self) -> <T::QueueData as ForeignOwnable>::Borrowed<'_> {
+ // SAFETY: By type invariant, self is a valid gendisk.
+ unsafe { T::QueueData::borrow((*(*self.gendisk).queue).queuedata) }
+ }
}
// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
--
2.51.2
^ permalink raw reply related
* Re: [PATCH v4] loop: Fix NULL pointer dereference in lo_rw_aio()
From: Al Viro @ 2026-06-09 17:50 UTC (permalink / raw)
To: Tetsuo Handa
Cc: Jens Axboe, Bart Van Assche, Christoph Hellwig, Damien Le Moal,
Ming Lei, linux-block, LKML, Andrew Morton, Linus Torvalds,
linux-btrfs, David Sterba, linux-fsdevel, Christian Brauner,
Hillf Danton
In-Reply-To: <3244d4dd-8254-47c0-9609-b1db53450c7c@I-love.SAKURA.ne.jp>
On Sun, Jun 07, 2026 at 07:54:58PM +0900, Tetsuo Handa wrote:
> syzbot is reporting NULL pointer dereference in lo_rw_aio() [1][2].
> An analysis by the Gemini AI collaborator [3] considers that this problem
> is caused by a timing shift primarily exposed by commit 65565ca5f99b
> ("block: unify the synchronous bi_end_io callbacks"), along with helper
> refactorings like commit 92c3737a2473 ("block: add a bio_submit_or_kill
> helper").
>
> But due to difficulty of reproducing this race, discussion about what is
> happening and how to fix this problem is stalling. Also, we haven't
> identified how many filesystems are subjected to this problem.
>
> Therefore, this patch introduces a grace period for flushing pending I/O
> requests (which should be a good thing from the perspective of defensive
> programming) so that we won't hit NULL pointer dereference problem, and
> also emits BUG: message in order to help filesystem developers identify
> the caller of an I/O request that failed to wait for completion so that
> filesystem developers can fix such caller to wait for completion.
>
> Note that emitting BUG: message is enabled only if CONFIG_KCOV=y, for
> this check is a waste of computation resources for almost all users.
Still breaks xfs/259, same as the version in next-20260605...
^ permalink raw reply
* Re: [PATCH v7] block: propagate in_flight to whole disk on partition I/O
From: Jens Axboe @ 2026-06-09 16:13 UTC (permalink / raw)
To: hch, kbusch, Tang Yizhou; +Cc: yukuai, linux-block, linux-kernel, Leon Hwang
In-Reply-To: <20260526021555.359500-1-yizhou.tang@shopee.com>
On Tue, 26 May 2026 10:15:55 +0800, Tang Yizhou wrote:
> Now when I/O is submitted to a partition, the per-CPU in_flight[]
> counter is incremented only on the partition's block_device, not on the
> underlying whole disk. This leads to a problem which can be shown by a
> fio test:
>
> lsblk
> NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
> mydev 252:1 0 20G 0 disk
> └─mydev1 259:0 0 10G 0 part
>
> [...]
Applied, thanks!
[1/1] block: propagate in_flight to whole disk on partition I/O
commit: 5bdb8ec58b54b0e86672ba1991087611c7e52de5
Best regards,
--
Jens Axboe
^ permalink raw reply
* Re: [PATCH v2] virtio-blk: clamp zone report to the report buffer capacity
From: Jens Axboe @ 2026-06-09 16:02 UTC (permalink / raw)
To: Michael S . Tsirkin, Jason Wang, Stefan Hajnoczi,
Michael Bommarito
Cc: Xuan Zhuo, virtualization, linux-block, linux-kernel
In-Reply-To: <20260607124834.3059944-1-michael.bommarito@gmail.com>
On Sun, 07 Jun 2026 08:48:34 -0400, Michael Bommarito wrote:
> virtblk_report_zones() trusts the device-reported number of zones when
> walking the report buffer:
>
> nz = min_t(u64, virtio64_to_cpu(vblk->vdev, report->nr_zones),
> nr_zones);
> ...
> for (i = 0; i < nz && zone_idx < nr_zones; i++) {
> ret = virtblk_parse_zone(vblk, &report->zones[i], ...);
>
> [...]
Applied, thanks!
[1/1] virtio-blk: clamp zone report to the report buffer capacity
commit: 0fd835f5e9477ebea2439b8ada58f34e1b8cf25a
Best regards,
--
Jens Axboe
^ permalink raw reply
* Re: [PATCH v2] virtio-blk: clamp zone report to the report buffer capacity
From: Michael S. Tsirkin @ 2026-06-09 15:54 UTC (permalink / raw)
To: Michael Bommarito
Cc: Jason Wang, Stefan Hajnoczi, Jens Axboe, Xuan Zhuo,
virtualization, linux-block, linux-kernel
In-Reply-To: <20260607124834.3059944-1-michael.bommarito@gmail.com>
On Sun, Jun 07, 2026 at 08:48:34AM -0400, Michael Bommarito wrote:
> virtblk_report_zones() trusts the device-reported number of zones when
> walking the report buffer:
>
> nz = min_t(u64, virtio64_to_cpu(vblk->vdev, report->nr_zones),
> nr_zones);
> ...
> for (i = 0; i < nz && zone_idx < nr_zones; i++) {
> ret = virtblk_parse_zone(vblk, &report->zones[i], ...);
>
> The buffer is allocated by virtblk_alloc_report_buffer(), whose size is
> capped by the queue's max hardware sectors and max segments and can
> therefore hold fewer descriptors than nr_zones. nz is bounded only by
> the device-supplied report->nr_zones and the requested nr_zones, never
> by the buffer's descriptor capacity. At probe time the request count is
> unbounded (blk_revalidate_disk_zones() calls report_zones() with
> nr_zones == UINT_MAX), so the device-supplied report->nr_zones is the
> sole gate: a device that reports more zones than fit in the buffer
> drives the loop to read report->zones[i] past the end of the allocation.
>
> A malicious or buggy virtio-blk device that reports an inflated nr_zones
> triggers this during zone revalidation at probe. KASAN reports a
> vmalloc-out-of-bounds read in virtblk_report_zones() against the report
> buffer allocated a few lines earlier.
>
> Clamp nz to the number of descriptors that actually fit in the report
> buffer.
>
> Fixes: 95bfec41bd3d ("virtio-blk: add support for zoned block devices")
> Assisted-by: Claude:claude-opus-4-8
> Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> v2: drop the explanatory comment per Michael S. Tsirkin's review; the
> clamp itself is unchanged.
>
> drivers/block/virtio_blk.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index b1c9a27..32bf3ba 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -689,6 +689,8 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
>
> nz = min_t(u64, virtio64_to_cpu(vblk->vdev, report->nr_zones),
> nr_zones);
> + nz = min_t(u64, nz,
> + (buflen - sizeof(*report)) / sizeof(report->zones[0]));
> if (!nz)
> break;
>
>
> base-commit: 5200f5f493f79f14bbdc349e402a40dfb32f23c8
> --
> 2.53.0
^ permalink raw reply
* Re: [PATCH v7] block: propagate in_flight to whole disk on partition I/O
From: Yizhou Tang @ 2026-06-09 15:28 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Tang Yizhou, axboe, kbusch, yukuai, linux-block, linux-kernel,
Leon Hwang
In-Reply-To: <20260527141057.GA13345@lst.de>
On Wed, May 27, 2026 at 10:11 PM Christoph Hellwig <hch@lst.de> wrote:
>
> Looks good:
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
>
Hi Jens,
This patch has been on the list for over two weeks without any new
review comments. If you have no objections, please consider picking it
up for the next merge window.
Best regards,
Yi
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox