[PATCH v2 58/83] block: rnull: add polled completion support

From: Andreas Hindborg

Date: Tue Jun 09 2026 - 15:31:33 EST


Add support for polled I/O completion in rnull. This feature requires
configuring poll queues via the `poll_queues` attribute.

Signed-off-by: Andreas Hindborg <a.hindborg@xxxxxxxxxx>
---
drivers/block/rnull/configfs.rs | 19 +++++-
drivers/block/rnull/rnull.rs | 133 ++++++++++++++++++++++++++++++++++++----
2 files changed, 139 insertions(+), 13 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index f866595a263c..0637c1e0ab22 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -81,7 +81,7 @@ impl AttributeOperations<0> for Config {
writer.write_str(
"blocksize,size,rotational,irqmode,completion_nsec,memory_backed,\
submit_queues,use_per_node_hctx,discard,blocking,shared_tags,\
- zoned,zone_size,zone_capacity\n",
+ zoned,zone_size,zone_capacity,poll_queues\n",
)?;
Ok(writer.bytes_written())
}
@@ -127,6 +127,7 @@ fn make_group(
zone_max_open: 24,
zone_max_active: 25,
zone_append_max_sectors: 26,
+ poll_queues: 27,
],
};

@@ -167,6 +168,7 @@ fn make_group(
zone_max_open: 0,
zone_max_active: 0,
zone_append_max_sectors: u32::MAX,
+ poll_queues: 0,
}),
}),
core::iter::empty(),
@@ -253,6 +255,7 @@ struct DeviceConfigInner {
zone_max_open: u32,
zone_max_active: u32,
zone_append_max_sectors: u32,
+ poll_queues: u32,
}

#[vtable]
@@ -305,6 +308,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
shared_tag_set: guard.shared_tags.then(|| guard.shared_tag_set.clone()),
tag_set: crate::TagSetOptions {
submit_queues: guard.submit_queues,
+ poll_queues: guard.poll_queues,
home_node: guard.home_node,
blocking: guard.blocking,
memory_backed: guard.memory_backed,
@@ -498,3 +502,16 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
configfs_simple_field!(DeviceConfig, 24, zone_max_open, u32);
configfs_simple_field!(DeviceConfig, 25, zone_max_active, u32);
configfs_simple_field!(DeviceConfig, 26, zone_append_max_sectors, u32);
+configfs_simple_field!(
+ DeviceConfig,
+ 27,
+ poll_queues,
+ u32,
+ check(|value| {
+ if value > kernel::cpu::num_possible_cpus() {
+ Err(kernel::error::code::EINVAL)
+ } else {
+ Ok(())
+ }
+ })
+);
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 076493f92516..edb4ef53d6ad 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -33,6 +33,7 @@
GenDisk,
GenDiskRef, //
},
+ IoCompletionBatch,
Operations,
TagSet, //
},
@@ -186,6 +187,10 @@
default: 0,
description: "Maximum size of a zone append command (in 512B sectors). Specify 0 for no zone append.",
},
+ poll_queues: u32 {
+ default: 0,
+ description: "Number of IOPOLL submission queues.",
+ },
},
}

@@ -207,6 +212,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
} else {
module_parameters::submit_queues.value()
};
+ let poll_queues = module_parameters::poll_queues.value();
let home_node = module_parameters::home_node.value();
let blocking = module_parameters::blocking.value();
let memory_backed = module_parameters::memory_backed.value();
@@ -215,6 +221,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {

let shared_tag_set = NullBlkDevice::build_tag_set(TagSetOptions {
submit_queues,
+ poll_queues,
home_node,
blocking,
memory_backed,
@@ -246,6 +253,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
.then(|| shared_tag_set.clone()),
tag_set: TagSetOptions {
submit_queues,
+ poll_queues,
home_node,
blocking,
memory_backed,
@@ -325,6 +333,7 @@ struct NullBlkDevice {

struct TagSetOptions {
submit_queues: u32,
+ poll_queues: u32,
home_node: i32,
blocking: bool,
memory_backed: bool,
@@ -338,6 +347,7 @@ impl NullBlkDevice {
fn build_tag_set(options: TagSetOptions) -> Result<Arc<TagSet<Self>>> {
let TagSetOptions {
submit_queues,
+ poll_queues,
home_node,
blocking,
memory_backed,
@@ -364,7 +374,21 @@ fn build_tag_set(options: TagSetOptions) -> Result<Arc<TagSet<Self>>> {
}

Arc::pin_init(
- TagSet::new(submit_queues, (), hw_queue_depth, 1, numa_node, flags),
+ TagSet::new(
+ submit_queues + poll_queues,
+ KBox::new(
+ NullBlkTagsetData {
+ queue_depth: hw_queue_depth,
+ submit_queue_count: submit_queues,
+ poll_queue_count: poll_queues,
+ },
+ GFP_KERNEL,
+ )?,
+ hw_queue_depth,
+ if poll_queues == 0 { 1 } else { 3 },
+ numa_node,
+ flags,
+ ),
GFP_KERNEL,
)
}
@@ -729,6 +753,7 @@ fn run(

struct HwQueueContext {
page: Option<KBox<disk_storage::NullBlockPage>>,
+ poll_queue: kernel::alloc::ringbuffer::KRingBuffer<Owned<mq::Request<NullBlkDevice>>>,
}

#[pin_data]
@@ -757,11 +782,17 @@ impl HasHrTimer<Self> for Pdu {
}
}

+struct NullBlkTagsetData {
+ queue_depth: u32,
+ submit_queue_count: u32,
+ poll_queue_count: u32,
+}
+
#[vtable]
impl Operations for NullBlkDevice {
type QueueData = Arc<Self>;
type RequestData = Pdu;
- type TagSetData = ();
+ type TagSetData = KBox<NullBlkTagsetData>;
type HwData = Pin<KBox<SpinLock<HwQueueContext>>>;

fn new_request_data() -> impl PinInit<Self::RequestData> {
@@ -777,7 +808,7 @@ fn queue_rq(
this: ArcBorrow<'_, Self>,
rq: Owned<mq::IdleRequest<Self>>,
_is_last: bool,
- _is_poll: bool,
+ is_poll: bool,
) -> BlkResult {
if this.bandwidth_limit != 0 {
if !this.bandwidth_timer.active() {
@@ -814,13 +845,29 @@ fn queue_rq(
#[cfg(not(CONFIG_BLK_DEV_ZONED))]
this.handle_regular_command(&hw_data, &mut rq)?;

- match this.irq_mode {
- IRQMode::None => Self::end_request(rq),
- IRQMode::Soft => mq::Request::complete(rq.into()),
- IRQMode::Timer => {
- OwnableRefCounted::into_shared(rq)
- .start(this.completion_time)
- .dismiss();
+ if is_poll {
+ // NOTE: We lack the ability to insert `Owned<Request>` into a
+ // `kernel::list::List`, so we use a `RingBuffer` instead. The
+ // drawback of this is that we have to allocate the space for the
+ // ring buffer during drive initialization, and we have to hold the
+ // lock protecting the list until we have processed all the requests
+ // in the list. Change to a linked list when the kernel gets this
+ // ability.
+
+ // NOTE: We are processing requests during submit rather than during
+ // poll. This is different from C driver. C driver does processing
+ // during poll.
+
+ hw_data.lock().poll_queue.push_head(rq)?;
+ } else {
+ match this.irq_mode {
+ IRQMode::None => Self::end_request(rq),
+ IRQMode::Soft => mq::Request::complete(rq.into()),
+ IRQMode::Timer => {
+ OwnableRefCounted::into_shared(rq)
+ .start(this.completion_time)
+ .dismiss();
+ }
}
}
Ok(())
@@ -828,8 +875,40 @@ fn queue_rq(

fn commit_rqs(_hw_data: Pin<&SpinLock<HwQueueContext>>, _queue_data: ArcBorrow<'_, Self>) {}

- fn init_hctx(_tagset_data: (), _hctx_idx: u32) -> Result<Self::HwData> {
- KBox::pin_init(new_spinlock!(HwQueueContext { page: None }), GFP_KERNEL)
+ fn poll(
+ hw_data: Pin<&SpinLock<HwQueueContext>>,
+ _this: ArcBorrow<'_, Self>,
+ batch: &mut IoCompletionBatch<Self>,
+ ) -> Result<bool> {
+ let mut guard = hw_data.lock();
+ let mut completed = false;
+
+ while let Some(rq) = guard.poll_queue.pop_tail() {
+ let status = rq.data_ref().error.load(ordering::Relaxed);
+ rq.data_ref().error.store(0, ordering::Relaxed);
+
+ // TODO: check error handling via status
+ if let Err(rq) = batch.add_request(rq, status != 0) {
+ Self::end_request(rq);
+ }
+
+ completed = true;
+ }
+
+ Ok(completed)
+ }
+
+ fn init_hctx(tagset_data: &NullBlkTagsetData, _hctx_idx: u32) -> Result<Self::HwData> {
+ KBox::pin_init(
+ new_spinlock!(HwQueueContext {
+ page: None,
+ poll_queue: kernel::alloc::ringbuffer::KRingBuffer::new(
+ tagset_data.queue_depth.try_into()?,
+ GFP_KERNEL,
+ )?,
+ }),
+ GFP_KERNEL,
+ )
}

fn complete(rq: ARef<mq::Request<Self>>) {
@@ -849,4 +928,34 @@ fn report_zones(
) -> Result<u32> {
Self::report_zones_internal(disk, sector, nr_zones, callback)
}
+
+ fn map_queues(tag_set: Pin<&mut TagSet<Self>>) {
+ let mut submit_queue_count = tag_set.data().submit_queue_count;
+ let mut poll_queue_count = tag_set.data().poll_queue_count;
+
+ if tag_set.hw_queue_count() != submit_queue_count + poll_queue_count {
+ pr_warn!(
+ "tag set has unexpected hardware queue count: {}\n",
+ tag_set.hw_queue_count()
+ );
+ submit_queue_count = 1;
+ poll_queue_count = 0;
+ }
+
+ let mut offset = 0;
+ tag_set
+ .update_maps(|mut qmap| {
+ use mq::QueueType::*;
+ let queue_count = match qmap.kind() {
+ Default => submit_queue_count,
+ Read => 0,
+ Poll => poll_queue_count,
+ };
+ qmap.set_queue_count(queue_count);
+ qmap.set_offset(offset);
+ offset += queue_count;
+ qmap.map_queues();
+ })
+ .unwrap()
+ }
}

--
2.51.2