Re: [RFC PATCH 1/1] dmaengine: introduce dmaengine_bh_wq and bh helpers

From: Allen

Date: Thu Jan 08 2026 - 14:23:22 EST


> > Create a dedicated dmaengine bottom-half workqueue (WQ_BH | WQ_PERCPU)
> > and provide helper APIs for queue/flush/cancel of BH work items. Add
> > per-channel BH helpers in dma_chan so drivers can schedule a BH callback
> > without maintaining their own tasklets.
> >
> > Convert virt-dma to use the new per-channel BH helpers and remove the
> > per-channel tasklet. Update existing drivers that only need tasklet
> > teardown to use dma_chan_kill_bh().
> >
> > This provides a common BH execution path for dmaengine and establishes
> > the base for converting remaining DMA tasklets to workqueue-based BHs.
> >
> > Signed-off-by: Allen Pais <allen.lkml@xxxxxxxxx>
>
> Hi Allen,
>
> I agree that the dmaengine code should stop using tasklets here,
> but I think the last time we discussed this, we ended up not
> going with work queues as a replacement because of the inherent
> scheduling overhead.
>
> The use of this tasklet is to invoke the dmaengine_desc_callback(),
> which at the moment clearly expects to be called from tasklet
> context, but in most cases probably should just be called from
> hardirq context, e.g. when all it does is to call complete()
> or wake_up(). In particular, I assume this breaks any console
> driver that tries to use DMA to send output to a UART.
>
> It may make sense to take the portions of your patch that
> abstract the dmaengine drivers away from tasklet and have them
> interact with shared functions, but I don't think we should
> introduce a workqueue at all here, at least not until we
> have identified dmaengine users that want workqueue behavior.
>
> If your goal is to reduce the number of tasklet uses in the
> kernel, I would suggest taking this on at one level higher up
> the stack: assume that dma_async_tx_descriptor->callback()
> is always called at tasklet context, and introduce an
> alternative mechanism that is called from hardirq context,
> then change over each user of dma_async_tx_descriptor to
> use the hardirq method instead of the tasklet method, if
> at all possible.

Arnd,

Thanks for the feedback. My intent with WQ_BH was to keep callbacks in
softirq/BH context, but I agree the scheduling overhead and existing tasklet
assumptions are valid concerns.

I can re-spin the RFC to drop the workqueue entirely and keep
tasklet semantics,
while still abstracting tasklet handling into dmaengine helpers so drivers no
longer directly manipulate tasklets. That keeps
dmaengine_desc_callback_invoke()
in tasklet context and avoids breaking DMA users that rely on that behavior.

A hardirq callback path feels like a larger API change, so I’d
prefer to handle
that as a separate follow‑up (e.g. explicit hardirq callback/flag + user
migration where safe). Thoughts?

Here’s a diff on top of v6.19-rc4 that does exactly this.

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index ca13cd39330b..611047763b35 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1425,6 +1425,48 @@ static void dmaengine_destroy_unmap_pool(void)
}
}

+static void dma_chan_bh_entry(struct tasklet_struct *t)
+{
+ struct dma_chan *chan = from_tasklet(chan, t, bh_tasklet);
+ dma_chan_bh_work_fn fn = READ_ONCE(chan->bh_work_fn);
+
+ if (fn)
+ fn(chan);
+}
+
+void dma_chan_init_bh(struct dma_chan *chan, dma_chan_bh_work_fn fn)
+{
+ if (WARN_ON(!fn))
+ return;
+
+ if (WARN_ON(chan->bh_work_initialized))
+ return;
+
+ chan->bh_work_fn = fn;
+ tasklet_setup(&chan->bh_tasklet, dma_chan_bh_entry);
+ chan->bh_work_initialized = true;
+}
+EXPORT_SYMBOL_GPL(dma_chan_init_bh);
+
+bool dma_chan_schedule_bh(struct dma_chan *chan)
+{
+ if (WARN_ON(!chan->bh_work_initialized))
+ return false;
+
+ tasklet_schedule(&chan->bh_tasklet);
+ return true;
+}
+EXPORT_SYMBOL_GPL(dma_chan_schedule_bh);
+
+void dma_chan_kill_bh(struct dma_chan *chan)
+{
+ if (!chan->bh_work_initialized)
+ return;
+
+ tasklet_kill(&chan->bh_tasklet);
+}
+EXPORT_SYMBOL_GPL(dma_chan_kill_bh);
+
static int __init dmaengine_init_unmap_pool(void)
{
int i;
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 7961172a780d..3d3fadb81f8d 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -77,12 +77,12 @@ struct virt_dma_desc *vchan_find_desc(struct
virt_dma_chan *vc,
EXPORT_SYMBOL_GPL(vchan_find_desc);

/*
- * This tasklet handles the completion of a DMA descriptor by
- * calling its callback and freeing it.
+ * This bottom-half handler completes a DMA descriptor by invoking its
+ * callback and freeing it.
*/
-static void vchan_complete(struct tasklet_struct *t)
+static void vchan_complete(struct dma_chan *chan)
{
- struct virt_dma_chan *vc = from_tasklet(vc, t, task);
+ struct virt_dma_chan *vc = to_virt_chan(chan);
struct virt_dma_desc *vd, *_vd;
struct dmaengine_desc_callback cb;
LIST_HEAD(head);
@@ -131,7 +131,7 @@ void vchan_init(struct virt_dma_chan *vc, struct
dma_device *dmadev)
INIT_LIST_HEAD(&vc->desc_completed);
INIT_LIST_HEAD(&vc->desc_terminated);

- tasklet_setup(&vc->task, vchan_complete);
+ dma_chan_init_bh(&vc->chan, vchan_complete);

vc->chan.device = dmadev;
list_add_tail(&vc->chan.device_node, &dmadev->channels);
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index 59d9eabc8b67..75c6fee2f70e 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -8,8 +8,6 @@
#define VIRT_DMA_H

#include <linux/dmaengine.h>
-#include <linux/interrupt.h>
-
#include "dmaengine.h"

struct virt_dma_desc {
@@ -21,7 +19,6 @@ struct virt_dma_desc {

struct virt_dma_chan {
struct dma_chan chan;
- struct tasklet_struct task;
void (*desc_free)(struct virt_dma_desc *);

spinlock_t lock;
@@ -106,7 +103,7 @@ static inline void vchan_cookie_complete(struct
virt_dma_desc *vd)
vd, cookie);
list_add_tail(&vd->node, &vc->desc_completed);

- tasklet_schedule(&vc->task);
+ dma_chan_schedule_bh(&vc->chan);
}
@@ -137,7 +134,7 @@ static inline void vchan_cyclic_callback(struct
virt_dma_desc *vd)
struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);

vc->cyclic = vd;
- tasklet_schedule(&vc->task);
+ dma_chan_schedule_bh(&vc->chan);
}
@@ -223,7 +220,7 @@ static inline void vchan_synchronize(struct
virt_dma_chan *vc)
LIST_HEAD(head);
unsigned long flags;

- tasklet_kill(&vc->task);
+ dma_chan_kill_bh(&vc->chan);

spin_lock_irqsave(&vc->lock, flags);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 99efe2b9b4ea..2d2c8ab3764d 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -12,6 +12,7 @@
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
#include <linux/types.h>
+#include <linux/interrupt.h>
#include <asm/page.h>
@@ -295,6 +296,10 @@ enum dma_desc_metadata_mode {
DESC_METADATA_ENGINE = BIT(1),
};

+struct dma_chan;
+
+typedef void (*dma_chan_bh_work_fn)(struct dma_chan *chan);
+
@@ -334,6 +339,9 @@ struct dma_router {
* @router: pointer to the DMA router structure
* @route_data: channel specific data for the router
* @private: private data for certain client-channel associations
+ * @bh_tasklet: bottom-half tasklet stored per-channel
+ * @bh_work_fn: callback executed when @bh_tasklet runs
+ * @bh_work_initialized: indicates whether @bh_tasklet has been initialized
*/
struct dma_chan {
@@ -359,6 +367,9 @@ struct dma_chan {
void *route_data;

void *private;
+ struct tasklet_struct bh_tasklet;
+ dma_chan_bh_work_fn bh_work_fn;
+ bool bh_work_initialized;
};
@@ -1528,6 +1539,9 @@ struct dma_chan *devm_dma_request_chan(struct
device *dev, const char *name);

void dma_release_channel(struct dma_chan *chan);
int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps);
+void dma_chan_init_bh(struct dma_chan *chan, dma_chan_bh_work_fn fn);
+bool dma_chan_schedule_bh(struct dma_chan *chan);
+void dma_chan_kill_bh(struct dma_chan *chan);
#else
@@ -1575,6 +1589,20 @@ static inline int dma_get_slave_caps(struct
dma_chan *chan,
{
return -ENXIO;
}
+
+static inline void dma_chan_init_bh(struct dma_chan *chan,
+ dma_chan_bh_work_fn fn)
+{
+}
+
+static inline bool dma_chan_schedule_bh(struct dma_chan *chan)
+{
+ return false;
+}
+
+static inline void dma_chan_kill_bh(struct dma_chan *chan)
+{
+}
#endif


>
> Arnd



--
- Allen