[PATCH 1/5] media: hws: program video DMA through remap windows

From: Ben Hoff

Date: Sun Jun 21 2026 - 19:50:30 EST


Video capture DMA setup uses the BAR remap table plus per-channel device
buffer offsets. Drop the stale direct writes to the legacy DMA address
register and share the remap slot offset between probe-time seeding and
runtime programming.
---
drivers/media/pci/hws/hws_irq.c | 6 ++----
drivers/media/pci/hws/hws_pci.c | 11 +++++------
drivers/media/pci/hws/hws_reg.h | 11 ++++++-----
drivers/media/pci/hws/hws_video.c | 31 +++++++------------------------
drivers/media/pci/hws/hws_video.h | 2 --
5 files changed, 20 insertions(+), 41 deletions(-)

diff --git a/drivers/media/pci/hws/hws_irq.c b/drivers/media/pci/hws/hws_irq.c
index eebb4b8a5cd5..a79cc10720d7 100644
--- a/drivers/media/pci/hws/hws_irq.c
+++ b/drivers/media/pci/hws/hws_irq.c
@@ -60,7 +60,7 @@ static int hws_arm_next(struct hws_pcie_dev *hws, u32 ch)
dev_dbg(&hws->pdev->dev, "arm_next(ch=%u): picked buffer %p\n", ch,
buf);

- /* Publish descriptor(s) before doorbell/MMIO kicks. */
+ /* Publish descriptor(s) before MMIO capture updates. */
wmb();

/* Avoid MMIO during suspend */
@@ -79,13 +79,11 @@ static int hws_arm_next(struct hws_pcie_dev *hws, u32 ch)
return -EBUSY;
}

- /* Also program the DMA address register directly */
+ /* Program the video DMA window for the selected buffer. */
{
dma_addr_t dma_addr =
vb2_dma_contig_plane_dma_addr(&buf->vb.vb2_buf, 0);
hws_program_dma_for_addr(hws, ch, dma_addr);
- iowrite32(lower_32_bits(dma_addr),
- hws->bar0_base + HWS_REG_DMA_ADDR(ch));
}

dev_dbg(&hws->pdev->dev, "arm_next(ch=%u): programmed buffer %p\n", ch,
diff --git a/drivers/media/pci/hws/hws_pci.c b/drivers/media/pci/hws/hws_pci.c
index 30bb7d34465b..10af6c30566a 100644
--- a/drivers/media/pci/hws/hws_pci.c
+++ b/drivers/media/pci/hws/hws_pci.c
@@ -315,12 +315,11 @@ static void hws_seed_channel(struct hws_pcie_dev *hws, int ch)

lo &= PCI_E_BAR_ADD_MASK;

- /* Program 64-bit BAR remap entry for this channel (table @ 0x208 + ch * 8) */
- writel_relaxed(hi, hws->bar0_base +
- PCI_ADDR_TABLE_BASE + 0x208 + ch * 8);
- writel_relaxed(lo, hws->bar0_base +
- PCI_ADDR_TABLE_BASE + 0x208 + ch * 8 +
- PCIE_BARADDROFSIZE);
+ /* Program 64-bit BAR remap entry for this channel. */
+ writel_relaxed(hi, hws->bar0_base + PCI_ADDR_TABLE_BASE +
+ HWS_VIDEO_REMAP_SLOT_OFF(ch));
+ writel_relaxed(lo, hws->bar0_base + PCI_ADDR_TABLE_BASE +
+ HWS_VIDEO_REMAP_SLOT_OFF(ch) + PCIE_BARADDROFSIZE);

/* Program capture engine per-channel base/half */
writel_relaxed((ch + 1) * PCIEBAR_AXI_BASE + pci_addr,
diff --git a/drivers/media/pci/hws/hws_reg.h b/drivers/media/pci/hws/hws_reg.h
index e4fb4af44434..344cb8d011a9 100644
--- a/drivers/media/pci/hws/hws_reg.h
+++ b/drivers/media/pci/hws/hws_reg.h
@@ -87,11 +87,12 @@
#define HWS_REG_HDCP_STATUS (CVBS_IN_BASE + 8 * PCIE_BARADDROFSIZE)
#define HWS_REG_DMA_MAX_SIZE (CVBS_IN_BASE + 9 * PCIE_BARADDROFSIZE)

-/* Buffer addresses (written once during init/reset). */
-/* Base of host-visible buffer. */
-#define HWS_REG_VBUF1_ADDR (CVBS_IN_BASE + 25 * PCIE_BARADDROFSIZE)
-/* Per-channel DMA address. */
-#define HWS_REG_DMA_ADDR(ch) (CVBS_IN_BASE + (26 + (ch)) * PCIE_BARADDROFSIZE)
+/*
+ * Video DMA setup uses one BAR remap-table slot per capture channel. The
+ * remap-table slot supplies the host DMA page, while CVBS_IN_BUF_BASE +
+ * ch * 4 supplies the device-side buffer offset within that page.
+ */
+#define HWS_VIDEO_REMAP_SLOT_OFF(ch) (0x208 + ((ch) * 8))

/* Per-channel live buffer toggles (read-only). */
#define HWS_REG_VBUF_TOGGLE(ch) (CVBS_IN_BASE + (32 + (ch)) * PCIE_BARADDROFSIZE)
diff --git a/drivers/media/pci/hws/hws_video.c b/drivers/media/pci/hws/hws_video.c
index 18e4bc6901d3..b1af81d1368a 100644
--- a/drivers/media/pci/hws/hws_video.c
+++ b/drivers/media/pci/hws/hws_video.c
@@ -26,7 +26,6 @@
#include "hws_irq.h"
#include "hws_v4l2_ioctl.h"

-#define HWS_REMAP_SLOT_OFF(ch) (0x208 + (ch) * 8) /* one 64-bit slot per ch */
#define HWS_BUF_BASE_OFF(ch) (CVBS_IN_BUF_BASE + (ch) * PCIE_BARADDROFSIZE)
#define HWS_HALF_SZ_OFF(ch) (CVBS_IN_BUF_BASE2 + (ch) * PCIE_BARADDROFSIZE)

@@ -59,21 +58,13 @@ module_param_named(dma_window_verify, dma_window_verify, bool, 0644);
MODULE_PARM_DESC(dma_window_verify,
"Read back DMA window registers after programming (debug)");

-void hws_set_dma_doorbell(struct hws_pcie_dev *hws, unsigned int ch,
- dma_addr_t dma, const char *tag)
-{
- iowrite32(lower_32_bits(dma), hws->bar0_base + HWS_REG_DMA_ADDR(ch));
- dev_dbg(&hws->pdev->dev, "dma_doorbell ch%u: dma=0x%llx tag=%s\n", ch,
- (u64)dma, tag ? tag : "");
-}
-
static void hws_program_dma_window(struct hws_video *vid, dma_addr_t dma)
{
const u32 addr_mask = PCI_E_BAR_ADD_MASK;
const u32 addr_low_mask = PCI_E_BAR_ADD_LOWMASK;
struct hws_pcie_dev *hws = vid->parent;
unsigned int ch = vid->channel_index;
- u32 table_off = HWS_REMAP_SLOT_OFF(ch);
+ u32 table_off = HWS_VIDEO_REMAP_SLOT_OFF(ch);
u32 lo = lower_32_bits(dma);
u32 hi = upper_32_bits(dma);
u32 pci_addr = lo & addr_low_mask;
@@ -170,8 +161,6 @@ void hws_prime_next_locked(struct hws_video *vid)
vid->next_prepared = next;
dma = vb2_dma_contig_plane_dma_addr(&next->vb.vb2_buf, 0);
hws_program_dma_for_addr(hws, vid->channel_index, dma);
- iowrite32(lower_32_bits(dma),
- hws->bar0_base + HWS_REG_DMA_ADDR(vid->channel_index));
dev_dbg(&hws->pdev->dev,
"ch%u pre-armed next buffer %p dma=0x%llx\n",
vid->channel_index, next, (u64)dma);
@@ -183,7 +172,7 @@ static bool hws_force_no_signal_frame(struct hws_video *v, const char *tag)
unsigned long flags;
struct hwsvideo_buffer *buf = NULL, *next = NULL;
bool have_next = false;
- bool doorbell = false;
+ bool programmed = false;

if (!v)
return false;
@@ -239,12 +228,11 @@ static bool hws_force_no_signal_frame(struct hws_video *v, const char *tag)
if (have_next && next) {
dma_addr_t dma =
vb2_dma_contig_plane_dma_addr(&next->vb.vb2_buf, 0);
+
hws_program_dma_for_addr(hws, v->channel_index, dma);
- hws_set_dma_doorbell(hws, v->channel_index, dma,
- tag ? tag : "nosignal_zero");
- doorbell = true;
+ programmed = true;
}
- if (doorbell) {
+ if (programmed) {
wmb(); /* ensure descriptors visible before enabling capture */
hws_enable_video_capture(hws, v->channel_index, true);
}
@@ -534,7 +522,6 @@ static void hws_seed_dma_windows(struct hws_pcie_dev *hws)
{
const u32 addr_mask = PCI_E_BAR_ADD_MASK;
const u32 addr_low_mask = PCI_E_BAR_ADD_LOWMASK;
- u32 table = 0x208; /* one 64-bit entry per channel */
unsigned int ch;

if (!hws || !hws->bar0_base)
@@ -544,7 +531,7 @@ static void hws_seed_dma_windows(struct hws_pcie_dev *hws)
if (!hws->cur_max_video_ch || hws->cur_max_video_ch > hws->max_channels)
hws->cur_max_video_ch = hws->max_channels;

- for (ch = 0; ch < hws->cur_max_video_ch; ch++, table += 8) {
+ for (ch = 0; ch < hws->cur_max_video_ch; ch++) {
if (!hws->scratch_vid[ch].cpu)
continue;

@@ -554,6 +541,7 @@ static void hws_seed_dma_windows(struct hws_pcie_dev *hws)
u32 lo = lower_32_bits(p) & addr_mask;
u32 hi = upper_32_bits(p);
u32 pci_addr_low = lower_32_bits(p) & addr_low_mask;
+ u32 table = HWS_VIDEO_REMAP_SLOT_OFF(ch);

writel_relaxed(hi,
hws->bar0_base + PCI_ADDR_TABLE_BASE +
@@ -1128,8 +1116,6 @@ static void hws_buffer_queue(struct vb2_buffer *vb)
dma_addr = vb2_dma_contig_plane_dma_addr(&buf->vb.vb2_buf, 0);
hws_program_dma_for_addr(vid->parent, vid->channel_index,
dma_addr);
- iowrite32(lower_32_bits(dma_addr),
- hws->bar0_base + HWS_REG_DMA_ADDR(vid->channel_index));

wmb(); /* ensure descriptors visible before enabling capture */
hws_enable_video_capture(hws, vid->channel_index, true);
@@ -1214,9 +1200,6 @@ static int hws_start_streaming(struct vb2_queue *q, unsigned int count)

dma_addr = vb2_dma_contig_plane_dma_addr(prog_vb2, 0);
hws_program_dma_for_addr(hws, v->channel_index, dma_addr);
- iowrite32(lower_32_bits(dma_addr),
- hws->bar0_base +
- HWS_REG_DMA_ADDR(v->channel_index));
dev_dbg(&hws->pdev->dev,
"start_streaming: ch=%u programmed buffer %p dma=0x%08x\n",
v->channel_index, to_program,
diff --git a/drivers/media/pci/hws/hws_video.h b/drivers/media/pci/hws/hws_video.h
index 4feaf5b2f5a9..fcd3eca0b0c3 100644
--- a/drivers/media/pci/hws/hws_video.h
+++ b/drivers/media/pci/hws/hws_video.h
@@ -20,8 +20,6 @@ void hws_init_video_sys(struct hws_pcie_dev *hws, bool enable);
void hws_program_dma_for_addr(struct hws_pcie_dev *hws,
unsigned int ch,
dma_addr_t dma);
-void hws_set_dma_doorbell(struct hws_pcie_dev *hws, unsigned int ch,
- dma_addr_t dma, const char *tag);

int hws_video_quiesce(struct hws_pcie_dev *hws, const char *reason);
void hws_video_pm_resume(struct hws_pcie_dev *hws);
--
2.54.0