Re: [PATCH V1] accel/amdxdna: Add carveout memory support for non-IOMMU systems

From: Lizhi Hou

Date: Mon Apr 20 2026 - 13:34:01 EST



On 4/19/26 11:13, Mario Limonciello wrote:


On 4/17/26 16:06, Lizhi Hou wrote:
From: Max Zhen <max.zhen@xxxxxxx>

Add support for allocating buffers from reserved carveout memory when
IOMMU is not available. This is useful during debugging or bring-up.

In this configuration, the device uses physical addresses and does
not support scatter-gather lists, requiring physically contiguous
buffers.

Implement carveout-backed allocation and integrate it into buffer
management to support operation in physical address mode.

Running with IOMMU disabled is unlikely to be a common production scenario.  At first I was thinking this is OK, but considering Greg's strong comments about driver specific module parameters I /wonder/ if this should be gated behind a kconfig option to be used at bringup?

The Kconfig option could be something like XDNA_CARVEOUT and default to 0.  Then it can be set to any value necessary for debugging.

You could go a step further and only let XDNA_CARVEOUT do the reservation when IOMMU wasn't found.

Thanks for your comments. It would be more useful to change the carveout setting without recompile the driver. Because this is a debug function, so we will implement debugfs to config the carveout address and size.


Lizhi



Signed-off-by: Max Zhen <max.zhen@xxxxxxx>
Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
---
  drivers/accel/amdxdna/Makefile          |   1 +
  drivers/accel/amdxdna/amdxdna_cbuf.c    | 249 ++++++++++++++++++++++++
  drivers/accel/amdxdna/amdxdna_cbuf.h    |  16 ++
  drivers/accel/amdxdna/amdxdna_gem.c     |  95 +++++++--
  drivers/accel/amdxdna/amdxdna_iommu.c   |  77 +++++---
  drivers/accel/amdxdna/amdxdna_pci_drv.c |  91 ++++++---
  drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +-
  7 files changed, 454 insertions(+), 79 deletions(-)
  create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.c
  create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.h

diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index 79369e497540..a055aea36971 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -12,6 +12,7 @@ amdxdna-y := \
      aie2_solver.o \
      aie4_message.o \
      aie4_pci.o \
+    amdxdna_cbuf.o \
      amdxdna_ctx.o \
      amdxdna_gem.o \
      amdxdna_iommu.o \
diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.c b/drivers/accel/amdxdna/amdxdna_cbuf.c
new file mode 100644
index 000000000000..4a556199a461
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_cbuf.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_mm.h>
+#include <drm/drm_prime.h>
+
+#include "amdxdna_cbuf.h"
+#include "amdxdna_pci_drv.h"
+
+/*
+ * This is a platform debug/bringup feature.
+ *
+ * Carveout memory is a chunk of memory which is physically contiguous and
+ * is reserved during early boot time. There is only one chunk of such memory
+ * per system. Once available, all BOs accessible from device should be
+ * allocated from this memory.
+ */
+u64 carveout_addr;
+module_param(carveout_addr, ullong, 0400);
+MODULE_PARM_DESC(carveout_addr, "Physical memory address for reserved memory chunk");
+
+u64 carveout_size;
+module_param(carveout_size, ullong, 0400);
+MODULE_PARM_DESC(carveout_size, "Physical memory size for reserved memory chunk");
+
+struct amdxdna_carveout {
+    struct drm_mm    mm;
+    struct mutex    lock; /* protect mm */
+} carveout;
+
+bool amdxdna_use_carveout(void)
+{
+    return !!carveout_size;
+}
+
+void amdxdna_carveout_init(void)
+{
+    if (!amdxdna_use_carveout())
+        return;
+    mutex_init(&carveout.lock);
+    drm_mm_init(&carveout.mm, carveout_addr, carveout_size);
+    pr_info("Use carveout mem, addr=0x%llx, size=0x%llx\n", carveout_addr, carveout_size);
+}
+
+void amdxdna_carveout_fini(void)
+{
+    if (!amdxdna_use_carveout())
+        return;
+    drm_mm_takedown(&carveout.mm);
+    mutex_destroy(&carveout.lock);
+}
+
+struct amdxdna_cbuf_priv {
+    struct drm_mm_node node;
+};
+
+static struct sg_table *amdxdna_cbuf_map(struct dma_buf_attachment *attach,
+                     enum dma_data_direction direction)
+{
+    struct amdxdna_cbuf_priv *cbuf = attach->dmabuf->priv;
+    struct device *dev = attach->dev;
+    struct scatterlist *sgl, *sg;
+    int ret, n_entries, i;
+    struct sg_table *sgt;
+    dma_addr_t dma_addr;
+    size_t dma_size;
+    size_t max_seg;
+
+    sgt = kzalloc_obj(*sgt);
+    if (!sgt)
+        return ERR_PTR(-ENOMEM);
+
+    max_seg = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
+    n_entries = (cbuf->node.size + max_seg - 1) / max_seg;
+    sgl = kzalloc_objs(*sg, n_entries);
+    if (!sgl) {
+        ret = -ENOMEM;
+        goto free_sgt;
+    }
+    sg_init_table(sgl, n_entries);
+    sgt->orig_nents = n_entries;
+    sgt->nents = n_entries;
+    sgt->sgl = sgl;
+
+    dma_size = cbuf->node.size;
+    dma_addr = dma_map_resource(dev, cbuf->node.start, dma_size,
+                    direction, DMA_ATTR_SKIP_CPU_SYNC);
+    ret = dma_mapping_error(dev, dma_addr);
+    if (ret) {
+        pr_err("Failed to dma_map_resource carveout dma buf, ret %d\n", ret);
+        goto free_sgl;
+    }
+
+    for_each_sgtable_dma_sg(sgt, sg, i) {
+        size_t len = min_t(size_t, max_seg, dma_size);
+
+        sg_dma_address(sg) = dma_addr;
+        sg_dma_len(sg) = len;
+        dma_addr += len;
+        dma_size -= len;
+    }
+
+    return sgt;
+
+free_sgl:
+    kfree(sgl);
+free_sgt:
+    kfree(sgt);
+    return ERR_PTR(ret);
+}
+
+static void amdxdna_cbuf_unmap(struct dma_buf_attachment *attach,
+                   struct sg_table *sgt,
+                   enum dma_data_direction direction)
+{
+    dma_unmap_resource(attach->dev, sg_dma_address(sgt->sgl),
+               drm_prime_get_contiguous_size(sgt), direction,
+               DMA_ATTR_SKIP_CPU_SYNC);
+    sg_free_table(sgt);
+    kfree(sgt);
+}
+
+static void amdxdna_cbuf_release(struct dma_buf *dbuf)
+{
+    struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+
+    mutex_lock(&carveout.lock);
+    drm_mm_remove_node(&cbuf->node);
+    mutex_unlock(&carveout.lock);
+
+    kfree(cbuf);
+}
+
+static vm_fault_t amdxdna_cbuf_vm_fault(struct vm_fault *vmf)
+{
+    struct vm_area_struct *vma = vmf->vma;
+    struct amdxdna_cbuf_priv *cbuf;
+    unsigned long pfn;
+    pgoff_t pgoff;
+
+    cbuf = vma->vm_private_data;
+    pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+    pfn = (cbuf->node.start >> PAGE_SHIFT) + pgoff;
+
+    return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static const struct vm_operations_struct amdxdna_cbuf_vm_ops = {
+    .fault = amdxdna_cbuf_vm_fault,
+};
+
+static int amdxdna_cbuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
+{
+    struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+
+    vma->vm_ops = &amdxdna_cbuf_vm_ops;
+    vma->vm_private_data = cbuf;
+    vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+
+    return 0;
+}
+
+static int amdxdna_cbuf_vmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+    struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+    void *kva;
+
+    kva = memremap(cbuf->node.start, cbuf->node.size, MEMREMAP_WB);
+    if (!kva) {
+        pr_err("Failed to vmap carveout dma buf\n");
+        return -ENOMEM;
+    }
+
+    iosys_map_set_vaddr(map, kva);
+    return 0;
+}
+
+static void amdxdna_cbuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+    memunmap(map->vaddr);
+}
+
+static const struct dma_buf_ops amdxdna_cbuf_dmabuf_ops = {
+    .map_dma_buf = amdxdna_cbuf_map,
+    .unmap_dma_buf = amdxdna_cbuf_unmap,
+    .release = amdxdna_cbuf_release,
+    .mmap = amdxdna_cbuf_mmap,
+    .vmap = amdxdna_cbuf_vmap,
+    .vunmap = amdxdna_cbuf_vunmap,
+};
+
+static int amdxdna_cbuf_clear(struct dma_buf *dbuf)
+{
+    struct iosys_map vmap = IOSYS_MAP_INIT_VADDR(NULL);
+
+    dma_buf_vmap(dbuf, &vmap);
+    if (!vmap.vaddr)
+        return -EFAULT;
+
+    memset(vmap.vaddr, 0, dbuf->size);
+    dma_buf_vunmap(dbuf, &vmap);
+
+    return 0;
+}
+
+struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment)
+{
+    DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+    struct amdxdna_cbuf_priv *cbuf;
+    struct dma_buf *dbuf;
+    int ret;
+
+    cbuf = kzalloc_obj(*cbuf);
+    if (!cbuf)
+        return ERR_PTR(-ENOMEM);
+
+    mutex_lock(&carveout.lock);
+    ret = drm_mm_insert_node_generic(&carveout.mm, &cbuf->node, size,
+                     alignment, 0, DRM_MM_INSERT_BEST);
+    mutex_unlock(&carveout.lock);
+    if (ret)
+        goto free_cbuf;
+
+    exp_info.size = size;
+    exp_info.ops = &amdxdna_cbuf_dmabuf_ops;
+    exp_info.priv = cbuf;
+    exp_info.flags = O_RDWR;
+    dbuf = dma_buf_export(&exp_info);
+    if (IS_ERR(dbuf)) {
+        ret = PTR_ERR(dbuf);
+        goto remove_node;
+    }
+
+    ret = amdxdna_cbuf_clear(dbuf);
+    if (ret) {
+        dma_buf_put(dbuf);
+        goto out;
+    }
+    return dbuf;
+
+remove_node:
+    drm_mm_remove_node(&cbuf->node);
+free_cbuf:
+    kfree(cbuf);
+out:
+    return ERR_PTR(ret);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.h b/drivers/accel/amdxdna/amdxdna_cbuf.h
new file mode 100644
index 000000000000..15e189ce779e
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_cbuf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+#ifndef _AMDXDNA_CBUF_H_
+#define _AMDXDNA_CBUF_H_
+
+#include <drm/drm_device.h>
+#include <linux/dma-buf.h>
+
+bool amdxdna_use_carveout(void);
+void amdxdna_carveout_init(void);
+void amdxdna_carveout_fini(void);
+struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment);
+
+#endif
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 238ee244d4a6..905514ec183c 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -16,6 +16,7 @@
  #include <linux/pagemap.h>
  #include <linux/vmalloc.h>
  +#include "amdxdna_cbuf.h"
  #include "amdxdna_ctx.h"
  #include "amdxdna_gem.h"
  #include "amdxdna_pci_drv.h"
@@ -516,10 +517,6 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
  static inline bool
  amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
  {
-    /* Do not count imported BOs since the buffer is not allocated by us. */
-    if (is_import_bo(abo))
-        return true;
-
      /* Already counted as part of HEAP BO */
      if (abo->type == AMDXDNA_BO_DEV)
          return true;
@@ -571,9 +568,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
      if (abo->type == AMDXDNA_BO_DEV_HEAP)
          drm_mm_takedown(&abo->mm);
  -    if (amdxdna_iova_on(xdna))
-        amdxdna_iommu_unmap_bo(xdna, abo);
-
+    amdxdna_dma_unmap_bo(xdna, abo);
      amdxdna_gem_vunmap(abo);
      mutex_destroy(&abo->lock);
  @@ -591,18 +586,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
        guard(mutex)(&abo->lock);
      abo->open_ref++;
+    if (abo->open_ref > 1)
+        return 0;
  -    if (abo->open_ref == 1) {
-        /* Attached to the client when first opened by it. */
-        abo->client = filp->driver_priv;
-        amdxdna_gem_add_bo_usage(abo);
-    }
-    if (amdxdna_iova_on(xdna)) {
-        ret = amdxdna_iommu_map_bo(xdna, abo);
+    /* Attached to the client when first opened by it. */
+    abo->client = filp->driver_priv;
+
+    /* No need to set up dma addr mapping in PASID mode. */
+    if (!amdxdna_pasid_on(abo->client)) {
+        ret = amdxdna_dma_map_bo(xdna, abo);
          if (ret)
              return ret;
      }
  +    amdxdna_gem_add_bo_usage(abo);
      return 0;
  }
  @@ -620,6 +617,39 @@ static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *
      }
  }
  +static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+    struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+    int ret;
+
+    iosys_map_clear(map);
+
+    dma_resv_assert_held(obj->resv);
+
+    if (is_import_bo(abo))
+        ret = dma_buf_vmap(abo->dma_buf, map);
+    else
+        ret = drm_gem_shmem_object_vmap(obj, map);
+    if (ret)
+        return ret;
+    if (!map->vaddr)
+        return -ENOMEM;
+
+    return 0;
+}
+
+static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+    struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+
+    dma_resv_assert_held(obj->resv);
+
+    if (is_import_bo(abo))
+        dma_buf_vunmap(abo->dma_buf, map);
+    else
+        drm_gem_shmem_object_vunmap(obj, map);
+}
+
  static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
  {
      struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
@@ -645,8 +675,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
      .pin = drm_gem_shmem_object_pin,
      .unpin = drm_gem_shmem_object_unpin,
      .get_sg_table = drm_gem_shmem_object_get_sg_table,
-    .vmap = drm_gem_shmem_object_vmap,
-    .vunmap = drm_gem_shmem_object_vunmap,
+    .vmap = amdxdna_gem_obj_vmap,
+    .vunmap = amdxdna_gem_obj_vunmap,
      .mmap = amdxdna_gem_obj_mmap,
      .vm_ops = &drm_gem_shmem_vm_ops,
      .export = amdxdna_gem_prime_export,
@@ -714,6 +744,36 @@ amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create
      return to_xdna_obj(gobj);
  }
  +static struct amdxdna_gem_obj *
+amdxdna_gem_create_cbuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args)
+{
+    struct amdxdna_dev *xdna = to_xdna_dev(dev);
+    size_t size = PAGE_ALIGN(args->size);
+    struct drm_gem_object *gobj;
+    struct amdxdna_gem_obj *ret;
+    struct dma_buf *dma_buf;
+    u64 align;
+
+    if (!size) {
+        XDNA_ERR(xdna, "Invalid BO size 0x%llx", args->size);
+        return ERR_PTR(-EINVAL);
+    }
+
+    align = (args->type == AMDXDNA_BO_DEV_HEAP) ? xdna->dev_info->dev_mem_size : 0;
+    dma_buf = amdxdna_get_cbuf(dev, size, align);
+    if (IS_ERR(dma_buf))
+        return ERR_CAST(dma_buf);
+
+    gobj = amdxdna_gem_prime_import(dev, dma_buf);
+    if (IS_ERR(gobj))
+        ret = ERR_CAST(gobj);
+    else
+        ret = to_xdna_obj(gobj);
+
+    dma_buf_put(dma_buf);
+    return ret;
+}
+
  struct drm_gem_object *
  amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
  {
@@ -769,6 +829,8 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
        if (args->vaddr)
          abo = amdxdna_gem_create_ubuf_object(dev, args);
+    else if (amdxdna_use_carveout())
+        abo = amdxdna_gem_create_cbuf_object(dev, args);
      else
          abo = amdxdna_gem_create_shmem_object(dev, args);
      if (IS_ERR(abo))
@@ -884,7 +946,6 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
           args->type, args->vaddr, args->size, args->flags);
      switch (args->type) {
      case AMDXDNA_BO_CMD:
-        fallthrough;
      case AMDXDNA_BO_SHARE:
          abo = amdxdna_drm_create_share_bo(dev, args, filp);
          break;
diff --git a/drivers/accel/amdxdna/amdxdna_iommu.c b/drivers/accel/amdxdna/amdxdna_iommu.c
index 5a9f06183487..eff00131d0f8 100644
--- a/drivers/accel/amdxdna/amdxdna_iommu.c
+++ b/drivers/accel/amdxdna/amdxdna_iommu.c
@@ -35,14 +35,15 @@ static struct iova *amdxdna_iommu_alloc_iova(struct amdxdna_dev *xdna,
      return iova;
  }
  -int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
+int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
  {
+    unsigned long contig_sz;
      struct sg_table *sgt;
      dma_addr_t dma_addr;
      struct iova *iova;
      ssize_t size;
  -    if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHMEM)
+    if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHARE)
          return 0;
        sgt = drm_gem_shmem_get_pages_sgt(&abo->base);
@@ -51,47 +52,63 @@ int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
          return PTR_ERR(sgt);
      }
  -    if (!sgt->orig_nents || !sg_page(sgt->sgl)) {
-        XDNA_ERR(xdna, "sgl is zero length or not page backed");
+    if (!sgt->orig_nents) {
+        XDNA_ERR(xdna, "sgl is zero length");
          return -EOPNOTSUPP;
      }
  -    iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
-                    (abo->type == AMDXDNA_BO_DEV_HEAP));
-    if (IS_ERR(iova)) {
-        XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
-        return PTR_ERR(iova);
+    if (amdxdna_iova_on(xdna)) {
+        if (!sg_page(sgt->sgl)) {
+            XDNA_ERR(xdna, "sgl is not page backed");
+            return -EOPNOTSUPP;
+        }
+
+        iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
+                        (abo->type == AMDXDNA_BO_DEV_HEAP));
+        if (IS_ERR(iova)) {
+            XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
+            return PTR_ERR(iova);
+        }
+
+        size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
+                     IOMMU_READ | IOMMU_WRITE);
+        if (size < 0) {
+            XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
+            __free_iova(&xdna->iovad, iova);
+            return size;
+        }
+        if (size < abo->mem.size) {
+            iommu_unmap(xdna->domain, dma_addr, size);
+            __free_iova(&xdna->iovad, iova);
+            return -ENXIO;
+        }
+        abo->mem.dma_addr = dma_addr;
+    } else {
+        /* Device doesn't support scatter/gather list, fail non-contiguous mapping. */
+        contig_sz = drm_prime_get_contiguous_size(sgt);
+        if (contig_sz < abo->mem.size) {
+            XDNA_ERR(xdna,
+                 "noncontiguous dma addr, contig size:%ld, expected size:%ld",
+                 contig_sz, abo->mem.size);
+            return -EINVAL;
+        }
+        abo->mem.dma_addr = sg_dma_address(sgt->sgl);
      }
-
-    size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
-                 IOMMU_READ | IOMMU_WRITE);
-    if (size < 0) {
-        XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
-        __free_iova(&xdna->iovad, iova);
-        return size;
-    }
-
-    if (size < abo->mem.size) {
-        iommu_unmap(xdna->domain, dma_addr, size);
-        __free_iova(&xdna->iovad, iova);
-        return -ENXIO;
-    }
-
-    abo->mem.dma_addr = dma_addr;
-
      return 0;
  }
  -void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
+void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
  {
      size_t size;
        if (abo->mem.dma_addr == AMDXDNA_INVALID_ADDR)
          return;
  -    size = iova_align(&xdna->iovad, abo->mem.size);
-    iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
-    free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
+    if (amdxdna_iova_on(xdna)) {
+        size = iova_align(&xdna->iovad, abo->mem.size);
+        iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
+        free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
+    }
      abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
  }
  diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 21eddfc538d0..b8c5dbc12489 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -14,6 +14,7 @@
  #include <linux/iommu.h>
  #include <linux/pci.h>
  +#include "amdxdna_cbuf.h"
  #include "amdxdna_ctx.h"
  #include "amdxdna_gem.h"
  #include "amdxdna_pci_drv.h"
@@ -67,11 +68,40 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
      {0}
  };
  +static int amdxdna_sva_init(struct amdxdna_client *client)
+{
+    struct amdxdna_dev *xdna = client->xdna;
+
+    client->sva = iommu_sva_bind_device(xdna->ddev.dev, client->mm);
+    if (IS_ERR(client->sva)) {
+        XDNA_ERR(xdna, "SVA bind device failed, ret %ld", PTR_ERR(client->sva));
+        return PTR_ERR(client->sva);
+    }
+
+    client->pasid = iommu_sva_get_pasid(client->sva);
+    if (client->pasid == IOMMU_PASID_INVALID) {
+        iommu_sva_unbind_device(client->sva);
+        XDNA_ERR(xdna, "SVA get pasid failed");
+        return -ENODEV;
+    }
+
+    return 0;
+}
+
+static void amdxdna_sva_fini(struct amdxdna_client *client)
+{
+    if (IS_ERR_OR_NULL(client->sva))
+        return;
+
+    iommu_sva_unbind_device(client->sva);
+    client->sva = NULL;
+    client->pasid = IOMMU_PASID_INVALID;
+}
+
  static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
  {
      struct amdxdna_dev *xdna = to_xdna_dev(ddev);
      struct amdxdna_client *client;
-    int ret;
        client = kzalloc_obj(*client);
      if (!client)
@@ -80,22 +110,13 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
      client->pid = pid_nr(rcu_access_pointer(filp->pid));
      client->xdna = xdna;
      client->pasid = IOMMU_PASID_INVALID;
+    client->mm = current->mm;
        if (!amdxdna_iova_on(xdna)) {
-        client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
-        if (IS_ERR(client->sva)) {
-            ret = PTR_ERR(client->sva);
-            XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
-            goto failed;
-        }
-        client->pasid = iommu_sva_get_pasid(client->sva);
-        if (client->pasid == IOMMU_PASID_INVALID) {
-            XDNA_ERR(xdna, "SVA get pasid failed");
-            ret = -ENODEV;
-            goto unbind_sva;
-        }
+        /* No need to fail open since user may use pa + carveout later. */
+        if (amdxdna_sva_init(client))
+            XDNA_WARN(xdna, "PASID not available for pid %d", client->pid);
      }
-    client->mm = current->mm;
      mmgrab(client->mm);
      init_srcu_struct(&client->hwctx_srcu);
      xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
@@ -110,14 +131,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
        XDNA_DBG(xdna, "pid %d opened", client->pid);
      return 0;
-
-unbind_sva:
-    if (!IS_ERR_OR_NULL(client->sva))
-        iommu_sva_unbind_device(client->sva);
-failed:
-    kfree(client);
-
-    return ret;
  }
    static void amdxdna_client_cleanup(struct amdxdna_client *client)
@@ -131,11 +144,8 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
          drm_gem_object_put(to_gobj(client->dev_heap));
        mutex_destroy(&client->mm_lock);
-
-    if (!IS_ERR_OR_NULL(client->sva))
-        iommu_sva_unbind_device(client->sva);
      mmdrop(client->mm);
-
+    amdxdna_sva_fini(client);
      kfree(client);
  }
  @@ -242,15 +252,17 @@ static void amdxdna_show_fdinfo(struct drm_printer *p, struct drm_file *filp)
        /*
       * Note for driver specific BO memory usage stat.
-     * Total memory alloc = amdxdna-internal-alloc + amdxdna-external-alloc
+     * Total memory in use = amdxdna-internal-alloc + amdxdna-external-alloc, which
+     * includes both imported and created BOs. To avoid double counts, it includes
+     * HEAP BO, but not DEV BO. DEV BO is counted by amdxdna-heap-alloc.
       */
      drm_fdinfo_print_size(p, drv_name, "heap", "alloc", heap_usage);
      drm_fdinfo_print_size(p, drv_name, "internal", "alloc", internal_usage);
      drm_fdinfo_print_size(p, drv_name, "external", "alloc", external_usage);
      /*
       * Note for DRM standard BO memory stat.
-     * drm-total-memory counts both DEV BO and HEAP BO
-     * drm-shared-memory counts BO imported
+     * drm-total-memory counts both DEV BO and HEAP BO. The DEV BO size is double counted.
+     * drm-shared-memory counts BO shared with other processes/devices.
       */
      drm_show_memory_stats(p, filp);
  }
@@ -420,7 +432,26 @@ static struct pci_driver amdxdna_pci_driver = {
      .sriov_configure = amdxdna_sriov_configure,
  };
  -module_pci_driver(amdxdna_pci_driver);
+static int __init amdxdna_mod_init(void)
+{
+    int ret;
+
+    amdxdna_carveout_init();
+    ret = pci_register_driver(&amdxdna_pci_driver);
+    if (ret)
+        amdxdna_carveout_fini();
+
+    return ret;
+}
+
+static void __exit amdxdna_mod_exit(void)
+{
+    pci_unregister_driver(&amdxdna_pci_driver);
+    amdxdna_carveout_fini();
+}
+
+module_init(amdxdna_mod_init);
+module_exit(amdxdna_mod_exit);
    MODULE_LICENSE("GPL");
  MODULE_IMPORT_NS("AMD_PMF");
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index bdd0dc83f92e..07bd38281452 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -172,11 +172,11 @@ void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
    int amdxdna_iommu_init(struct amdxdna_dev *xdna);
  void amdxdna_iommu_fini(struct amdxdna_dev *xdna);
-int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
-void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
  void *amdxdna_iommu_alloc(struct amdxdna_dev *xdna, size_t size, dma_addr_t *dma_addr);
  void amdxdna_iommu_free(struct amdxdna_dev *xdna, size_t size,
              void *cpu_addr, dma_addr_t dma_addr);
+int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
+void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
    static inline bool amdxdna_iova_on(struct amdxdna_dev *xdna)
  {