Re: [PATCH 1/1] crypto: ccp - Allocate TEE ring and cmd buffer using DMA APIs

From: Christian König
Date: Tue Dec 06 2022 - 07:57:49 EST


Am 06.12.22 um 13:54 schrieb Rijo Thomas:

On 12/6/2022 6:11 PM, Christian König wrote:
Am 06.12.22 um 13:30 schrieb Rijo Thomas:
For AMD Secure Processor (ASP) to map and access TEE ring buffer, the
ring buffer address sent by host to ASP must be a real physical
address and the pages must be physically contiguous.

In a virtualized environment though, when the driver is running in a
guest VM, the pages allocated by __get_free_pages() may not be
contiguous in the host (or machine) physical address space. Guests
will see a guest (or pseudo) physical address and not the actual host
(or machine) physical address. The TEE running on ASP cannot decipher
pseudo physical addresses. It needs host or machine physical address.

To resolve this problem, use DMA APIs for allocating buffers that must
be shared with TEE. This will ensure that the pages are contiguous in
host (or machine) address space. If the DMA handle is an IOVA,
translate it into a physical address before sending it to ASP.

This patch also exports two APIs (one for buffer allocation and
another to free the buffer). This API can be used by AMD-TEE driver to
share buffers with TEE.
Maybe use some other name than dma_buffer for your structure, cause that is usually something completely different in the Linux kernel.

Sure Christian. I shall rename "struct dma_buffer" to "struct shm_buffer" (Shared Memory Buffer) in the file include/linux/psp-tee.h
The functions psp_tee_alloc_dmabuf() and psp_tee_free_dmabuf() shall be renamed to psp_tee_alloc_shmbuf() and psp_tee_free_shmbuf(), respectively.
I shall do correction in next patch revision. I will wait for others to review as well before I post update.

I strongly suggest to use the name psp_buffer or something similar because shm_buffer is usually used for something else as well.

Regards,
Christian.


Thanks,
Rijo

Regards,
Christian.

Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge")
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Rijo Thomas <Rijo-john.Thomas@xxxxxxx>
Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@xxxxxxx>
Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@xxxxxxx>
Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@xxxxxxx>
---
  drivers/crypto/ccp/psp-dev.c |   6 +-
  drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++-------------
  drivers/crypto/ccp/tee-dev.h |   9 +--
  include/linux/psp-tee.h      |  47 ++++++++++++++
  4 files changed, 127 insertions(+), 51 deletions(-)

diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index c9c741ac8442..2b86158d7435 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp)
          goto e_err;
      }
  +    if (sp->set_psp_master_device)
+        sp->set_psp_master_device(sp);
+
      ret = psp_init(psp);
      if (ret)
          goto e_irq;
  -    if (sp->set_psp_master_device)
-        sp->set_psp_master_device(sp);
-
      /* Enable interrupt */
      iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
  diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
index 5c9d47f3be37..1631d9851e54 100644
--- a/drivers/crypto/ccp/tee-dev.c
+++ b/drivers/crypto/ccp/tee-dev.c
@@ -12,8 +12,9 @@
  #include <linux/mutex.h>
  #include <linux/delay.h>
  #include <linux/slab.h>
+#include <linux/dma-direct.h>
+#include <linux/iommu.h>
  #include <linux/gfp.h>
-#include <linux/psp-sev.h>
  #include <linux/psp-tee.h>
    #include "psp-dev.h"
@@ -21,25 +22,64 @@
    static bool psp_dead;
  +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp)
+{
+    struct psp_device *psp = psp_get_master_device();
+    struct dma_buffer *dma_buf;
+    struct iommu_domain *dom;
+
+    if (!psp || !size)
+        return NULL;
+
+    dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL);
+    if (!dma_buf)
+        return NULL;
+
+    dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp);
+    if (!dma_buf->vaddr || !dma_buf->dma) {
+        kfree(dma_buf);
+        return NULL;
+    }
+
+    dma_buf->size = size;
+
+    dom = iommu_get_domain_for_dev(psp->dev);
+    if (dom)
+        dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma);
+    else
+        dma_buf->paddr = dma_buf->dma;
+
+    return dma_buf;
+}
+EXPORT_SYMBOL(psp_tee_alloc_dmabuf);
+
+void psp_tee_free_dmabuf(struct dma_buffer *dma_buf)
+{
+    struct psp_device *psp = psp_get_master_device();
+
+    if (!psp || !dma_buf)
+        return;
+
+    dma_free_coherent(psp->dev, dma_buf->size,
+              dma_buf->vaddr, dma_buf->dma);
+
+    kfree(dma_buf);
+}
+EXPORT_SYMBOL(psp_tee_free_dmabuf);
+
  static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size)
  {
      struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
-    void *start_addr;
        if (!ring_size)
          return -EINVAL;
  -    /* We need actual physical address instead of DMA address, since
-     * Trusted OS running on AMD Secure Processor will map this region
-     */
-    start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size));
-    if (!start_addr)
+    rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size,
+                        GFP_KERNEL | __GFP_ZERO);
+    if (!rb_mgr->ring_buf) {
+        dev_err(tee->dev, "ring allocation failed\n");
          return -ENOMEM;
-
-    memset(start_addr, 0x0, ring_size);
-    rb_mgr->ring_start = start_addr;
-    rb_mgr->ring_size = ring_size;
-    rb_mgr->ring_pa = __psp_pa(start_addr);
+    }
      mutex_init(&rb_mgr->mutex);
        return 0;
@@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee)
  {
      struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
  -    if (!rb_mgr->ring_start)
-        return;
+    psp_tee_free_dmabuf(rb_mgr->ring_buf);
  -    free_pages((unsigned long)rb_mgr->ring_start,
-           get_order(rb_mgr->ring_size));
-
-    rb_mgr->ring_start = NULL;
-    rb_mgr->ring_size = 0;
-    rb_mgr->ring_pa = 0;
      mutex_destroy(&rb_mgr->mutex);
  }
  @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout,
      return -ETIMEDOUT;
  }
  -static
-struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
+struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
  {
      struct tee_init_ring_cmd *cmd;
+    struct dma_buffer *cmd_buffer;
  -    cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-    if (!cmd)
+    cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd),
+                      GFP_KERNEL | __GFP_ZERO);
+    if (!cmd_buffer)
          return NULL;
  -    cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa);
-    cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa);
-    cmd->size = tee->rb_mgr.ring_size;
+    cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr;
+    cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr);
+    cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr);
+    cmd->size = tee->rb_mgr.ring_buf->size;
        dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n",
          cmd->hi_addr, cmd->low_addr, cmd->size);
  -    return cmd;
+    return cmd_buffer;
  }
  -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd)
+static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer)
  {
-    kfree(cmd);
+    psp_tee_free_dmabuf(cmd_buffer);
  }
    static int tee_init_ring(struct psp_tee_device *tee)
  {
      int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd);
-    struct tee_init_ring_cmd *cmd;
-    phys_addr_t cmd_buffer;
+    struct dma_buffer *cmd_buffer;
      unsigned int reg;
      int ret;
  @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee)
        tee->rb_mgr.wptr = 0;
  -    cmd = tee_alloc_cmd_buffer(tee);
-    if (!cmd) {
+    cmd_buffer = tee_alloc_cmd_buffer(tee);
+    if (!cmd_buffer) {
          tee_free_ring(tee);
          return -ENOMEM;
      }
  -    cmd_buffer = __psp_pa((void *)cmd);
-
      /* Send command buffer details to Trusted OS by writing to
       * CPU-PSP message registers
       */
  -    iowrite32(lower_32_bits(cmd_buffer),
+    iowrite32(lower_32_bits(cmd_buffer->paddr),
            tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg);
-    iowrite32(upper_32_bits(cmd_buffer),
+    iowrite32(upper_32_bits(cmd_buffer->paddr),
            tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg);
      iowrite32(TEE_RING_INIT_CMD,
            tee->io_regs + tee->vdata->cmdresp_reg);
@@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee)
      }
    free_buf:
-    tee_free_cmd_buffer(cmd);
+    tee_free_cmd_buffer(cmd_buffer);
        return ret;
  }
@@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee)
      unsigned int reg;
      int ret;
  -    if (!tee->rb_mgr.ring_start)
+    if (!tee->rb_mgr.ring_buf->vaddr)
          return;
        if (psp_dead)
@@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
      do {
          /* Get pointer to ring buffer command entry */
          cmd = (struct tee_ring_cmd *)
-            (tee->rb_mgr.ring_start + tee->rb_mgr.wptr);
+            (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr);
            rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg);
  @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
        /* Update local copy of write pointer */
      tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd);
-    if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size)
+    if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size)
          tee->rb_mgr.wptr = 0;
        /* Trigger interrupt to Trusted OS */
diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h
index 49d26158b71e..9238487ee8bf 100644
--- a/drivers/crypto/ccp/tee-dev.h
+++ b/drivers/crypto/ccp/tee-dev.h
@@ -16,6 +16,7 @@
    #include <linux/device.h>
  #include <linux/mutex.h>
+#include <linux/psp-tee.h>
    #define TEE_DEFAULT_TIMEOUT        10
  #define MAX_BUFFER_SIZE            988
@@ -48,17 +49,13 @@ struct tee_init_ring_cmd {
    /**
   * struct ring_buf_manager - Helper structure to manage ring buffer.
- * @ring_start:  starting address of ring buffer
- * @ring_size:   size of ring buffer in bytes
- * @ring_pa:     physical address of ring buffer
   * @wptr:        index to the last written entry in ring buffer
+ * @ring_buf:    ring buffer allocated using DMA api
   */
  struct ring_buf_manager {
      struct mutex mutex;    /* synchronizes access to ring buffer */
-    void *ring_start;
-    u32 ring_size;
-    phys_addr_t ring_pa;
      u32 wptr;
+    struct dma_buffer *ring_buf;
  };
    struct psp_tee_device {
diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h
index cb0c95d6d76b..c0fa922f24d4 100644
--- a/include/linux/psp-tee.h
+++ b/include/linux/psp-tee.h
@@ -13,6 +13,7 @@
    #include <linux/types.h>
  #include <linux/errno.h>
+#include <linux/dma-mapping.h>
    /* This file defines the Trusted Execution Environment (TEE) interface commands
   * and the API exported by AMD Secure Processor driver to communicate with
@@ -40,6 +41,20 @@ enum tee_cmd_id {
      TEE_CMD_ID_UNMAP_SHARED_MEM,
  };
  +/**
+ * struct dma_buffer - Structure for a DMA buffer.
+ * @dma:    DMA buffer address
+ * @paddr:  Physical address of DMA buffer
+ * @vaddr:  CPU virtual address of DMA buffer
+ * @size:   Size of DMA buffer in bytes
+ */
+struct dma_buffer {
+    dma_addr_t dma;
+    phys_addr_t paddr;
+    void *vaddr;
+    unsigned long size;
+};
+
  #ifdef CONFIG_CRYPTO_DEV_SP_PSP
  /**
   * psp_tee_process_cmd() - Process command in Trusted Execution Environment
@@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len,
   */
  int psp_check_tee_status(void);
  +/**
+ * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using
+ * dma_alloc_coherent() API.
+ *
+ * This function can be used to allocate a shared memory region between the
+ * host and PSP TEE.
+ *
+ * Returns:
+ * non-NULL   a valid pointer to struct dma_buffer
+ * NULL       on failure
+ */
+struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp);
+
+/**
+ * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API.
+ *
+ * This function can be used to release shared memory region between host
+ * and PSP TEE.
+ *
+ */
+void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer);
+
  #else /* !CONFIG_CRYPTO_DEV_SP_PSP */
    static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf,
@@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void)
  {
      return -ENODEV;
  }
+
+static inline
+struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp)
+{
+    return NULL;
+}
+
+static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer)
+{
+}
  #endif /* CONFIG_CRYPTO_DEV_SP_PSP */
  #endif /* __PSP_TEE_H_ */