Re: [PATCH 1/1] crypto: ccp - Allocate TEE ring and cmd buffer using DMA APIs
From: Rijo Thomas
Date: Tue Dec 06 2022 - 08:16:44 EST
On 12/6/2022 6:26 PM, Christian König wrote:
> Am 06.12.22 um 13:54 schrieb Rijo Thomas:
>>
>> On 12/6/2022 6:11 PM, Christian König wrote:
>>> Am 06.12.22 um 13:30 schrieb Rijo Thomas:
>>>> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the
>>>> ring buffer address sent by host to ASP must be a real physical
>>>> address and the pages must be physically contiguous.
>>>>
>>>> In a virtualized environment though, when the driver is running in a
>>>> guest VM, the pages allocated by __get_free_pages() may not be
>>>> contiguous in the host (or machine) physical address space. Guests
>>>> will see a guest (or pseudo) physical address and not the actual host
>>>> (or machine) physical address. The TEE running on ASP cannot decipher
>>>> pseudo physical addresses. It needs host or machine physical address.
>>>>
>>>> To resolve this problem, use DMA APIs for allocating buffers that must
>>>> be shared with TEE. This will ensure that the pages are contiguous in
>>>> host (or machine) address space. If the DMA handle is an IOVA,
>>>> translate it into a physical address before sending it to ASP.
>>>>
>>>> This patch also exports two APIs (one for buffer allocation and
>>>> another to free the buffer). This API can be used by AMD-TEE driver to
>>>> share buffers with TEE.
>>> Maybe use some other name than dma_buffer for your structure, cause that is usually something completely different in the Linux kernel.
>>>
>> Sure Christian. I shall rename "struct dma_buffer" to "struct shm_buffer" (Shared Memory Buffer) in the file include/linux/psp-tee.h
>> The functions psp_tee_alloc_dmabuf() and psp_tee_free_dmabuf() shall be renamed to psp_tee_alloc_shmbuf() and psp_tee_free_shmbuf(), respectively.
>> I shall do correction in next patch revision. I will wait for others to review as well before I post update.
>
> I strongly suggest to use the name psp_buffer or something similar because shm_buffer is usually used for something else as well.
I see that the TEE subsystem defines "struct tee_shm".
Okay, I will name the newly added structure as "struct psp_tee_buffer" and the APIs as psp_tee_alloc_buffer() and psp_tee_free_buffer().
Complete function prototype:
struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size, gfp_t gfp);
void psp_tee_free_buffer(struct psp_tee_buffer *buffer);
Does this look okay?
Thanks,
Rijo
>
> Regards,
> Christian.
>
>>
>> Thanks,
>> Rijo
>>
>>> Regards,
>>> Christian.
>>>
>>>> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge")
>>>> Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
>>>> Cc: stable@xxxxxxxxxxxxxxx
>>>> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@xxxxxxx>
>>>> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@xxxxxxx>
>>>> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@xxxxxxx>
>>>> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@xxxxxxx>
>>>> ---
>>>> drivers/crypto/ccp/psp-dev.c | 6 +-
>>>> drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++-------------
>>>> drivers/crypto/ccp/tee-dev.h | 9 +--
>>>> include/linux/psp-tee.h | 47 ++++++++++++++
>>>> 4 files changed, 127 insertions(+), 51 deletions(-)
>>>>
>>>> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
>>>> index c9c741ac8442..2b86158d7435 100644
>>>> --- a/drivers/crypto/ccp/psp-dev.c
>>>> +++ b/drivers/crypto/ccp/psp-dev.c
>>>> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp)
>>>> goto e_err;
>>>> }
>>>> + if (sp->set_psp_master_device)
>>>> + sp->set_psp_master_device(sp);
>>>> +
>>>> ret = psp_init(psp);
>>>> if (ret)
>>>> goto e_irq;
>>>> - if (sp->set_psp_master_device)
>>>> - sp->set_psp_master_device(sp);
>>>> -
>>>> /* Enable interrupt */
>>>> iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
>>>> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
>>>> index 5c9d47f3be37..1631d9851e54 100644
>>>> --- a/drivers/crypto/ccp/tee-dev.c
>>>> +++ b/drivers/crypto/ccp/tee-dev.c
>>>> @@ -12,8 +12,9 @@
>>>> #include <linux/mutex.h>
>>>> #include <linux/delay.h>
>>>> #include <linux/slab.h>
>>>> +#include <linux/dma-direct.h>
>>>> +#include <linux/iommu.h>
>>>> #include <linux/gfp.h>
>>>> -#include <linux/psp-sev.h>
>>>> #include <linux/psp-tee.h>
>>>> #include "psp-dev.h"
>>>> @@ -21,25 +22,64 @@
>>>> static bool psp_dead;
>>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp)
>>>> +{
>>>> + struct psp_device *psp = psp_get_master_device();
>>>> + struct dma_buffer *dma_buf;
>>>> + struct iommu_domain *dom;
>>>> +
>>>> + if (!psp || !size)
>>>> + return NULL;
>>>> +
>>>> + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL);
>>>> + if (!dma_buf)
>>>> + return NULL;
>>>> +
>>>> + dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp);
>>>> + if (!dma_buf->vaddr || !dma_buf->dma) {
>>>> + kfree(dma_buf);
>>>> + return NULL;
>>>> + }
>>>> +
>>>> + dma_buf->size = size;
>>>> +
>>>> + dom = iommu_get_domain_for_dev(psp->dev);
>>>> + if (dom)
>>>> + dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma);
>>>> + else
>>>> + dma_buf->paddr = dma_buf->dma;
>>>> +
>>>> + return dma_buf;
>>>> +}
>>>> +EXPORT_SYMBOL(psp_tee_alloc_dmabuf);
>>>> +
>>>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf)
>>>> +{
>>>> + struct psp_device *psp = psp_get_master_device();
>>>> +
>>>> + if (!psp || !dma_buf)
>>>> + return;
>>>> +
>>>> + dma_free_coherent(psp->dev, dma_buf->size,
>>>> + dma_buf->vaddr, dma_buf->dma);
>>>> +
>>>> + kfree(dma_buf);
>>>> +}
>>>> +EXPORT_SYMBOL(psp_tee_free_dmabuf);
>>>> +
>>>> static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size)
>>>> {
>>>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
>>>> - void *start_addr;
>>>> if (!ring_size)
>>>> return -EINVAL;
>>>> - /* We need actual physical address instead of DMA address, since
>>>> - * Trusted OS running on AMD Secure Processor will map this region
>>>> - */
>>>> - start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size));
>>>> - if (!start_addr)
>>>> + rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size,
>>>> + GFP_KERNEL | __GFP_ZERO);
>>>> + if (!rb_mgr->ring_buf) {
>>>> + dev_err(tee->dev, "ring allocation failed\n");
>>>> return -ENOMEM;
>>>> -
>>>> - memset(start_addr, 0x0, ring_size);
>>>> - rb_mgr->ring_start = start_addr;
>>>> - rb_mgr->ring_size = ring_size;
>>>> - rb_mgr->ring_pa = __psp_pa(start_addr);
>>>> + }
>>>> mutex_init(&rb_mgr->mutex);
>>>> return 0;
>>>> @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee)
>>>> {
>>>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
>>>> - if (!rb_mgr->ring_start)
>>>> - return;
>>>> + psp_tee_free_dmabuf(rb_mgr->ring_buf);
>>>> - free_pages((unsigned long)rb_mgr->ring_start,
>>>> - get_order(rb_mgr->ring_size));
>>>> -
>>>> - rb_mgr->ring_start = NULL;
>>>> - rb_mgr->ring_size = 0;
>>>> - rb_mgr->ring_pa = 0;
>>>> mutex_destroy(&rb_mgr->mutex);
>>>> }
>>>> @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout,
>>>> return -ETIMEDOUT;
>>>> }
>>>> -static
>>>> -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
>>>> +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
>>>> {
>>>> struct tee_init_ring_cmd *cmd;
>>>> + struct dma_buffer *cmd_buffer;
>>>> - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
>>>> - if (!cmd)
>>>> + cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd),
>>>> + GFP_KERNEL | __GFP_ZERO);
>>>> + if (!cmd_buffer)
>>>> return NULL;
>>>> - cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa);
>>>> - cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa);
>>>> - cmd->size = tee->rb_mgr.ring_size;
>>>> + cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr;
>>>> + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr);
>>>> + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr);
>>>> + cmd->size = tee->rb_mgr.ring_buf->size;
>>>> dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n",
>>>> cmd->hi_addr, cmd->low_addr, cmd->size);
>>>> - return cmd;
>>>> + return cmd_buffer;
>>>> }
>>>> -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd)
>>>> +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer)
>>>> {
>>>> - kfree(cmd);
>>>> + psp_tee_free_dmabuf(cmd_buffer);
>>>> }
>>>> static int tee_init_ring(struct psp_tee_device *tee)
>>>> {
>>>> int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd);
>>>> - struct tee_init_ring_cmd *cmd;
>>>> - phys_addr_t cmd_buffer;
>>>> + struct dma_buffer *cmd_buffer;
>>>> unsigned int reg;
>>>> int ret;
>>>> @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee)
>>>> tee->rb_mgr.wptr = 0;
>>>> - cmd = tee_alloc_cmd_buffer(tee);
>>>> - if (!cmd) {
>>>> + cmd_buffer = tee_alloc_cmd_buffer(tee);
>>>> + if (!cmd_buffer) {
>>>> tee_free_ring(tee);
>>>> return -ENOMEM;
>>>> }
>>>> - cmd_buffer = __psp_pa((void *)cmd);
>>>> -
>>>> /* Send command buffer details to Trusted OS by writing to
>>>> * CPU-PSP message registers
>>>> */
>>>> - iowrite32(lower_32_bits(cmd_buffer),
>>>> + iowrite32(lower_32_bits(cmd_buffer->paddr),
>>>> tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg);
>>>> - iowrite32(upper_32_bits(cmd_buffer),
>>>> + iowrite32(upper_32_bits(cmd_buffer->paddr),
>>>> tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg);
>>>> iowrite32(TEE_RING_INIT_CMD,
>>>> tee->io_regs + tee->vdata->cmdresp_reg);
>>>> @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee)
>>>> }
>>>> free_buf:
>>>> - tee_free_cmd_buffer(cmd);
>>>> + tee_free_cmd_buffer(cmd_buffer);
>>>> return ret;
>>>> }
>>>> @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee)
>>>> unsigned int reg;
>>>> int ret;
>>>> - if (!tee->rb_mgr.ring_start)
>>>> + if (!tee->rb_mgr.ring_buf->vaddr)
>>>> return;
>>>> if (psp_dead)
>>>> @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
>>>> do {
>>>> /* Get pointer to ring buffer command entry */
>>>> cmd = (struct tee_ring_cmd *)
>>>> - (tee->rb_mgr.ring_start + tee->rb_mgr.wptr);
>>>> + (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr);
>>>> rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg);
>>>> @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
>>>> /* Update local copy of write pointer */
>>>> tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd);
>>>> - if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size)
>>>> + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size)
>>>> tee->rb_mgr.wptr = 0;
>>>> /* Trigger interrupt to Trusted OS */
>>>> diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h
>>>> index 49d26158b71e..9238487ee8bf 100644
>>>> --- a/drivers/crypto/ccp/tee-dev.h
>>>> +++ b/drivers/crypto/ccp/tee-dev.h
>>>> @@ -16,6 +16,7 @@
>>>> #include <linux/device.h>
>>>> #include <linux/mutex.h>
>>>> +#include <linux/psp-tee.h>
>>>> #define TEE_DEFAULT_TIMEOUT 10
>>>> #define MAX_BUFFER_SIZE 988
>>>> @@ -48,17 +49,13 @@ struct tee_init_ring_cmd {
>>>> /**
>>>> * struct ring_buf_manager - Helper structure to manage ring buffer.
>>>> - * @ring_start: starting address of ring buffer
>>>> - * @ring_size: size of ring buffer in bytes
>>>> - * @ring_pa: physical address of ring buffer
>>>> * @wptr: index to the last written entry in ring buffer
>>>> + * @ring_buf: ring buffer allocated using DMA api
>>>> */
>>>> struct ring_buf_manager {
>>>> struct mutex mutex; /* synchronizes access to ring buffer */
>>>> - void *ring_start;
>>>> - u32 ring_size;
>>>> - phys_addr_t ring_pa;
>>>> u32 wptr;
>>>> + struct dma_buffer *ring_buf;
>>>> };
>>>> struct psp_tee_device {
>>>> diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h
>>>> index cb0c95d6d76b..c0fa922f24d4 100644
>>>> --- a/include/linux/psp-tee.h
>>>> +++ b/include/linux/psp-tee.h
>>>> @@ -13,6 +13,7 @@
>>>> #include <linux/types.h>
>>>> #include <linux/errno.h>
>>>> +#include <linux/dma-mapping.h>
>>>> /* This file defines the Trusted Execution Environment (TEE) interface commands
>>>> * and the API exported by AMD Secure Processor driver to communicate with
>>>> @@ -40,6 +41,20 @@ enum tee_cmd_id {
>>>> TEE_CMD_ID_UNMAP_SHARED_MEM,
>>>> };
>>>> +/**
>>>> + * struct dma_buffer - Structure for a DMA buffer.
>>>> + * @dma: DMA buffer address
>>>> + * @paddr: Physical address of DMA buffer
>>>> + * @vaddr: CPU virtual address of DMA buffer
>>>> + * @size: Size of DMA buffer in bytes
>>>> + */
>>>> +struct dma_buffer {
>>>> + dma_addr_t dma;
>>>> + phys_addr_t paddr;
>>>> + void *vaddr;
>>>> + unsigned long size;
>>>> +};
>>>> +
>>>> #ifdef CONFIG_CRYPTO_DEV_SP_PSP
>>>> /**
>>>> * psp_tee_process_cmd() - Process command in Trusted Execution Environment
>>>> @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len,
>>>> */
>>>> int psp_check_tee_status(void);
>>>> +/**
>>>> + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using
>>>> + * dma_alloc_coherent() API.
>>>> + *
>>>> + * This function can be used to allocate a shared memory region between the
>>>> + * host and PSP TEE.
>>>> + *
>>>> + * Returns:
>>>> + * non-NULL a valid pointer to struct dma_buffer
>>>> + * NULL on failure
>>>> + */
>>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp);
>>>> +
>>>> +/**
>>>> + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API.
>>>> + *
>>>> + * This function can be used to release shared memory region between host
>>>> + * and PSP TEE.
>>>> + *
>>>> + */
>>>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer);
>>>> +
>>>> #else /* !CONFIG_CRYPTO_DEV_SP_PSP */
>>>> static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf,
>>>> @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void)
>>>> {
>>>> return -ENODEV;
>>>> }
>>>> +
>>>> +static inline
>>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp)
>>>> +{
>>>> + return NULL;
>>>> +}
>>>> +
>>>> +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer)
>>>> +{
>>>> +}
>>>> #endif /* CONFIG_CRYPTO_DEV_SP_PSP */
>>>> #endif /* __PSP_TEE_H_ */
>