Re: [PATCH RFC v4 1/3] block: add BIO_COMPLETE_IN_TASK for task-context completion
From: Jens Axboe
Date: Wed Mar 25 2026 - 16:15:05 EST
On 3/25/26 12:43 PM, Tal Zussman wrote:
> diff --git a/block/bio.c b/block/bio.c
> index 8203bb7455a9..69ee0d93041f 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -18,6 +18,7 @@
> #include <linux/highmem.h>
> #include <linux/blk-crypto.h>
> #include <linux/xarray.h>
> +#include <linux/local_lock.h>
>
> #include <trace/events/block.h>
> #include "blk.h"
> @@ -1714,6 +1715,60 @@ void bio_check_pages_dirty(struct bio *bio)
> }
> EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
>
> +struct bio_complete_batch {
> + local_lock_t lock;
> + struct bio_list list;
> + struct work_struct work;
> +};
> +
> +static DEFINE_PER_CPU(struct bio_complete_batch, bio_complete_batch) = {
> + .lock = INIT_LOCAL_LOCK(lock),
> +};
> +
> +static void bio_complete_work_fn(struct work_struct *w)
> +{
> + struct bio_complete_batch *batch;
> + struct bio_list list;
> +
> +again:
> + local_lock_irq(&bio_complete_batch.lock);
> + batch = this_cpu_ptr(&bio_complete_batch);
> + list = batch->list;
> + bio_list_init(&batch->list);
> + local_unlock_irq(&bio_complete_batch.lock);
> +
> + while (!bio_list_empty(&list)) {
> + struct bio *bio = bio_list_pop(&list);
> + bio->bi_end_io(bio);
> + }
> +
> + local_lock_irq(&bio_complete_batch.lock);
> + batch = this_cpu_ptr(&bio_complete_batch);
> + if (!bio_list_empty(&batch->list)) {
> + local_unlock_irq(&bio_complete_batch.lock);
> +
> + if (!need_resched())
> + goto again;
> +
> + schedule_work_on(smp_processor_id(), &batch->work);
> + return;
> + }
> + local_unlock_irq(&bio_complete_batch.lock);
> +}
bool looped = false;
do {
if (looped && need_resched()) {
schedule_work_on(smp_processor_id(), &batch->work);
break;
}
local_lock_irq(&bio_complete_batch.lock);
batch = this_cpu_ptr(&bio_complete_batch);
list = batch->list;
bio_list_init(&batch->list);
local_unlock_irq(&bio_complete_batch.lock);
if (bio_list_empty(&list))
break;
do {
struct bio *bio = bio_list_pop(&list);
bio->bi_end_io(bio);
} while (!bio_list_empty(&list));
looped = true;
} while (1);
would be a lot easier to read, and avoid needing the list manipulation
included twice.
> +static void bio_queue_completion(struct bio *bio)
> +{
> + struct bio_complete_batch *batch;
> + unsigned long flags;
> +
> + local_lock_irqsave(&bio_complete_batch.lock, flags);
> + batch = this_cpu_ptr(&bio_complete_batch);
> + bio_list_add(&batch->list, bio);
> + local_unlock_irqrestore(&bio_complete_batch.lock, flags);
> +
> + schedule_work_on(smp_processor_id(), &batch->work);
> +}
Maybe do something ala:
static void bio_queue_completion(struct bio *bio)
{
struct bio_complete_batch *batch;
unsigned long flags;
bool was_empty;
local_lock_irqsave(&bio_complete_batch.lock, flags);
batch = this_cpu_ptr(&bio_complete_batch);
was_empty = bio_list_empty(&batch->list);
bio_list_add(&batch->list, bio);
local_unlock_irqrestore(&bio_complete_batch.lock, flags);
if (was_empty)
schedule_work_on(smp_processor_id(), &batch->work);
}
Outside of these mostly nits, I like this approach. It avoids my main
worry with this, which was contention on the list locks. And on the
io_uring side, we'll never hit the !in_task() path anyway, as the
completions are run from the task always. The bio flag makes sense for
this.
--
Jens Axboe