[PATCH v20 06/14] dmaengine: qcom: bam_dma: add support for BAM locking

From: Bartosz Golaszewski

Date: Mon Jun 29 2026 - 06:10:32 EST

Add support for BAM pipe locking. To that end: when starting DMA on an RX
channel - prepend the existing queue of issued descriptors with an
additional "dummy" command descriptor with the LOCK bit set. Once the
transaction is done (no more issued descriptors), issue one more dummy
descriptor with the UNLOCK bit.

We *must* wait until the transaction is signalled as done because we
must not perform any writes into config registers while the engine is
busy.

The dummy writes must be issued into a scratchpad register of the client
so provide a mechanism to communicate the right address via descriptor
metadata.

Reviewed-by: Manivannan Sadhasivam <mani@xxxxxxxxxx>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@xxxxxxxxxxxxxxxx>
---
drivers/dma/qcom/bam_dma.c | 189 +++++++++++++++++++++++++++++++++++++--
include/linux/dma/qcom_bam_dma.h | 14 +++
2 files changed, 196 insertions(+), 7 deletions(-)

diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index f3e713a5259c2c7c24cfdcec094814eb1202971a..f4f258994264a234f60debd3e66e31a6b35d1dc5 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -28,11 +28,13 @@
#include <linux/clk.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
+#include <linux/dma/qcom_bam_dma.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_dma.h>
@@ -60,6 +62,8 @@ struct bam_desc_hw {
#define DESC_FLAG_EOB BIT(13)
#define DESC_FLAG_NWD BIT(12)
#define DESC_FLAG_CMD BIT(11)
+#define DESC_FLAG_LOCK BIT(10)
+#define DESC_FLAG_UNLOCK BIT(9)

struct bam_async_desc {
struct virt_dma_desc vd;
@@ -72,6 +76,10 @@ struct bam_async_desc {

struct bam_desc_hw *curr_desc;

+ /* BAM locking infrastructure */
+ struct scatterlist lock_sg;
+ struct bam_cmd_element lock_ce;
+
/* list node for the desc in the bam_chan list of descriptors */
struct list_head desc_node;
enum dma_transfer_direction dir;
@@ -425,6 +433,11 @@ struct bam_chan {
struct list_head desc_list;

struct list_head node;
+
+ /* BAM locking infrastructure */
+ phys_addr_t scratchpad_addr;
+ enum dma_transfer_direction direction;
+ bool bam_locked;
};

static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
@@ -638,8 +651,10 @@ static void bam_free_chan(struct dma_chan *chan)
goto err;
}

- scoped_guard(spinlock_irqsave, &bchan->vc.lock)
+ scoped_guard(spinlock_irqsave, &bchan->vc.lock) {
bam_reset_channel(bchan);
+ bchan->bam_locked = false;
+ }

dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
bchan->fifo_phys);
@@ -686,6 +701,35 @@ static int bam_slave_config(struct dma_chan *chan,
return 0;
}

+static int bam_metadata_attach(struct dma_async_tx_descriptor *desc, void *data, size_t len)
+{
+ struct bam_chan *bchan = to_bam_chan(desc->chan);
+ const struct bam_device_data *bdata = bchan->bdev->dev_data;
+ struct bam_desc_metadata *metadata = data;
+
+ if (!data)
+ return -EINVAL;
+
+ if (!bdata->pipe_lock_supported)
+ /*
+ * The client wants to use locking but this BAM version doesn't
+ * support it. Don't return an error here as this will stop the
+ * client from using DMA at all for no reason.
+ */
+ return 0;
+
+ guard(spinlock_irqsave)(&bchan->vc.lock);
+
+ bchan->scratchpad_addr = metadata->scratchpad_addr;
+ bchan->direction = metadata->direction;
+
+ return 0;
+}
+
+static const struct dma_descriptor_metadata_ops bam_metadata_ops = {
+ .attach = bam_metadata_attach,
+};
+
/**
* bam_prep_slave_sg - Prep slave sg transaction
*
@@ -702,6 +746,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
void *context)
{
struct bam_chan *bchan = to_bam_chan(chan);
+ struct dma_async_tx_descriptor *tx_desc;
struct bam_device *bdev = bchan->bdev;
struct bam_async_desc *async_desc;
struct scatterlist *sg;
@@ -757,7 +802,10 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
} while (remainder > 0);
}

- return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags);
+ tx_desc = vchan_tx_prep(&bchan->vc, &async_desc->vd, flags);
+ tx_desc->metadata_ops = &bam_metadata_ops;
+
+ return tx_desc;
}

/**
@@ -802,6 +850,7 @@ static int bam_dma_terminate_all(struct dma_chan *chan)
}

vchan_get_all_descriptors(&bchan->vc, &head);
+ bchan->bam_locked = false;
}

vchan_dma_desc_free_list(&bchan->vc, &head);
@@ -859,6 +908,15 @@ static int bam_resume(struct dma_chan *chan)
return 0;
}

+static void bam_dma_free_lock_desc(struct virt_dma_desc *vd)
+{
+ struct bam_async_desc *async_desc = container_of(vd, struct bam_async_desc, vd);
+ struct dma_chan *chan = vd->tx.chan;
+
+ dma_unmap_sg(chan->slave, &async_desc->lock_sg, 1, DMA_TO_DEVICE);
+ kfree(async_desc);
+}
+
/**
* process_channel_irqs - processes the channel interrupts
* @bdev: bam controller
@@ -919,13 +977,23 @@ static u32 process_channel_irqs(struct bam_device *bdev)
* push back to front of desc_issued so that
* it gets restarted by the work queue.
*/
+
+ list_del(&async_desc->desc_node);
if (!async_desc->num_desc) {
- vchan_cookie_complete(&async_desc->vd);
+ struct bam_desc_hw *hdesc = async_desc->desc;
+ u16 flags = le16_to_cpu(hdesc->flags);
+
+ if (flags & (DESC_FLAG_LOCK | DESC_FLAG_UNLOCK)) {
+ if (flags & DESC_FLAG_UNLOCK)
+ bchan->bam_locked = false;
+ bam_dma_free_lock_desc(&async_desc->vd);
+ } else {
+ vchan_cookie_complete(&async_desc->vd);
+ }
} else {
list_add(&async_desc->vd.node,
&bchan->vc.desc_issued);
}
- list_del(&async_desc->desc_node);
}
}

@@ -1046,13 +1114,101 @@ static void bam_apply_new_config(struct bam_chan *bchan,
bchan->reconfigure = 0;
}

+static struct bam_async_desc *
+bam_make_lock_desc(struct bam_chan *bchan, unsigned long flag)
+{
+ struct dma_chan *chan = &bchan->vc.chan;
+ struct bam_async_desc *async_desc;
+ struct bam_desc_hw *desc;
+ struct virt_dma_desc *vd;
+ struct virt_dma_chan *vc;
+ unsigned int mapped;
+
+ async_desc = kzalloc_flex(*async_desc, desc, 1, GFP_NOWAIT);
+ if (!async_desc) {
+ dev_err(bchan->bdev->dev, "failed to allocate the BAM lock descriptor\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sg_init_table(&async_desc->lock_sg, 1);
+
+ async_desc->num_desc = 1;
+ async_desc->curr_desc = async_desc->desc;
+ async_desc->dir = DMA_MEM_TO_DEV;
+
+ desc = async_desc->desc;
+
+ bam_prep_ce_le32(&async_desc->lock_ce, bchan->scratchpad_addr, BAM_WRITE_COMMAND, 0);
+ sg_set_buf(&async_desc->lock_sg, &async_desc->lock_ce, sizeof(async_desc->lock_ce));
+
+ mapped = dma_map_sg(chan->slave, &async_desc->lock_sg, 1, DMA_TO_DEVICE);
+ if (!mapped) {
+ kfree(async_desc);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ desc->flags |= cpu_to_le16(DESC_FLAG_CMD | flag);
+ desc->addr = sg_dma_address(&async_desc->lock_sg);
+ desc->size = cpu_to_le16(sizeof(struct bam_cmd_element));
+
+ vc = &bchan->vc;
+ vd = &async_desc->vd;
+
+ dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
+ vd->tx.flags = DMA_PREP_CMD;
+ vd->tx_result.result = DMA_TRANS_NOERROR;
+ vd->tx_result.residue = 0;
+
+ return async_desc;
+}
+
+static int bam_setup_pipe_lock(struct bam_chan *bchan)
+{
+ const struct bam_device_data *bdata = bchan->bdev->dev_data;
+ struct bam_async_desc *lock_desc, *unlock_desc;
+
+ lockdep_assert_held(&bchan->vc.lock);
+
+ if (!bdata->pipe_lock_supported || !bchan->scratchpad_addr ||
+ bchan->direction != DMA_MEM_TO_DEV)
+ return 0;
+
+ /*
+ * Allocate both the LOCK and the UNLOCK descriptors up-front so the
+ * operation is all-or-nothing: if either allocation fails we free both
+ * and run the sequence unlocked rather than leave the pipe locked with
+ * no matching UNLOCK.
+ *
+ * Both are queued in-band around the currently issued work: the LOCK is
+ * prepended so it enters the FIFO first, the UNLOCK is appended so it is
+ * the last descriptor of the sequence. They are loaded together with the
+ * payload in a single operation so the engine executes LOCK, the work
+ * and UNLOCK as one ordered batch.
+ */
+ lock_desc = bam_make_lock_desc(bchan, DESC_FLAG_LOCK);
+ if (IS_ERR(lock_desc))
+ return PTR_ERR(lock_desc);
+
+ unlock_desc = bam_make_lock_desc(bchan, DESC_FLAG_UNLOCK);
+ if (IS_ERR(unlock_desc)) {
+ bam_dma_free_lock_desc(&lock_desc->vd);
+ return PTR_ERR(unlock_desc);
+ }
+
+ list_add(&lock_desc->vd.node, &bchan->vc.desc_issued);
+ list_add_tail(&unlock_desc->vd.node, &bchan->vc.desc_issued);
+ bchan->bam_locked = true;
+
+ return 0;
+}
+
/**
* bam_start_dma - start next transaction
* @bchan: bam dma channel
*/
static void bam_start_dma(struct bam_chan *bchan)
{
- struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
+ struct virt_dma_desc *vd;
struct bam_device *bdev = bchan->bdev;
struct bam_async_desc *async_desc = NULL;
struct bam_desc_hw *desc;
@@ -1064,9 +1220,23 @@ static void bam_start_dma(struct bam_chan *bchan)

lockdep_assert_held(&bchan->vc.lock);

+ vd = vchan_next_desc(&bchan->vc);
if (!vd)
return;

+ /*
+ * Wrap the issued work with a LOCK/UNLOCK pair exactly once, at the
+ * start of a fresh sequence and only when there is real work to lock
+ * around. On a re-entry after a full FIFO, we see the BAM is locked
+ * and must not add another pair we simply continue loading the
+ * remainder of the same locked sequence.
+ */
+ if (!bchan->bam_locked) {
+ ret = bam_setup_pipe_lock(bchan);
+ if (ret == 0 && bchan->bam_locked)
+ vd = vchan_next_desc(&bchan->vc);
+ }
+
ret = pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return;
@@ -1191,8 +1361,12 @@ static void bam_issue_pending(struct dma_chan *chan)
*/
static void bam_dma_free_desc(struct virt_dma_desc *vd)
{
- struct bam_async_desc *async_desc = container_of(vd,
- struct bam_async_desc, vd);
+ struct bam_async_desc *async_desc = container_of(vd, struct bam_async_desc, vd);
+ struct bam_desc_hw *desc = async_desc->desc;
+ struct dma_chan *chan = vd->tx.chan;
+
+ if (le16_to_cpu(desc->flags) & (DESC_FLAG_LOCK | DESC_FLAG_UNLOCK))
+ dma_unmap_sg(chan->slave, &async_desc->lock_sg, 1, DMA_TO_DEVICE);

kfree(async_desc);
}
@@ -1384,6 +1558,7 @@ static int bam_dma_probe(struct platform_device *pdev)
bdev->common.device_terminate_all = bam_dma_terminate_all;
bdev->common.device_issue_pending = bam_issue_pending;
bdev->common.device_tx_status = bam_tx_status;
+ bdev->common.desc_metadata_modes = DESC_METADATA_CLIENT;
bdev->common.dev = bdev->dev;

ret = dma_async_device_register(&bdev->common);
diff --git a/include/linux/dma/qcom_bam_dma.h b/include/linux/dma/qcom_bam_dma.h
index 68fc0e643b1b97fe4520d5878daa322b81f4f559..a2594264b0f58c4b2b1c85e243cad0d5669c26dc 100644
--- a/include/linux/dma/qcom_bam_dma.h
+++ b/include/linux/dma/qcom_bam_dma.h
@@ -6,6 +6,8 @@
#ifndef _QCOM_BAM_DMA_H
#define _QCOM_BAM_DMA_H

+#include <linux/dmaengine.h>
+
#include <asm/byteorder.h>

/*
@@ -34,6 +36,18 @@ enum bam_command_type {
BAM_READ_COMMAND,
};

+/**
+ * struct bam_desc_metadata - DMA descriptor metadata specific to the BAM driver.
+ *
+ * @scratchpad_addr: Physical address to use for dummy write operations when
+ * queuing command descriptors with LOCK/UNLOCK bits set.
+ * @direction: Transfer direction of this channel.
+ */
+struct bam_desc_metadata {
+ phys_addr_t scratchpad_addr;
+ enum dma_transfer_direction direction;
+};
+
/*
* prep_bam_ce_le32 - Wrapper function to prepare a single BAM command
* element with the data already in le32 format.

--
2.47.3