Re: [RFC v2 12/20] dma-iommu: Implement NESTED_MSI cookie

From: Auger Eric
Date: Wed Oct 24 2018 - 14:44:18 EST


Hi Robin,

On 10/24/18 8:02 PM, Robin Murphy wrote:
> Hi Eric,
>
> On 2018-09-18 3:24 pm, Eric Auger wrote:
>> Up to now, when the type was UNMANAGED, we used to
>> allocate IOVA pages within a range provided by the user.
>> This does not work in nested mode.
>>
>> If both the host and the guest are exposed with SMMUs, each
>> would allocate an IOVA. The guest allocates an IOVA (gIOVA)
>> to map onto the guest MSI doorbell (gDB). The Host allocates
>> another IOVA (hIOVA) to map onto the physical doorbell (hDB).
>>
>> So we end up with 2 unrelated mappings, at S1 and S2:
>> ÂÂÂÂÂÂÂÂÂ S1ÂÂÂÂÂÂÂÂÂÂÂÂ S2
>> gIOVAÂÂÂ ->ÂÂÂÂ gDB
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ hIOVAÂÂÂ ->ÂÂÂ hDB
>>
>> The PCI device would be programmed with hIOVA.
>>
>> iommu_dma_bind_doorbell allows to pass gIOVA/gDB to the host
>> so that gIOVA can be used by the host instead of re-allocating
>> a new IOVA. That way the host can create the following nested
>> mapping:
>>
>> ÂÂÂÂÂÂÂÂÂ S1ÂÂÂÂÂÂÂÂÂÂ S2
>> gIOVAÂÂÂ ->ÂÂÂ gDBÂÂÂ ->ÂÂÂ hDB
>>
>> this time, the PCI device will be programmed with the gIOVA MSI
>> doorbell which is correctly map through the 2 stages.
>
> If I'm understanding things correctly, this plus a couple of the
> preceding patches all add up to a rather involved way of coercing an
> automatic allocator to only "allocate" predetermined addresses in an
> entirely known-ahead-of-time manner.
agreed
Given that the guy calling
> iommu_dma_bind_doorbell() could seemingly just as easily call
> iommu_map() at that point and not bother with an allocator cookie and
> all this machinery at all, what am I missing?
Well iommu_dma_map_msi_msg() gets called and is part of this existing
MSI mapping machinery. If we do not do anything this function allocates
an hIOVA that is not involved in any nested setup. So either we coerce
the allocator in place (which is what this series does) or we unplug the
allocator to replace this latter with a simple S2 mapping, as you
suggest, ie. iommu_map(gDB, hDB). Assuming we unplug the allocator, the
guy who actually calls iommu_dma_bind_doorbell() knows gDB but does not
know hDB. So I don't really get how we can simplify things.

Thanks

Eric

>
> Robin.
>
>>
>> Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>
>>
>> ---
>>
>> v1 -> v2:
>> - unmap stage2 on put()
>> ---
>> Â drivers/iommu/dma-iommu.c | 97 +++++++++++++++++++++++++++++++++++++--
>> Â include/linux/dma-iommu.h | 11 +++++
>> Â 2 files changed, 105 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 511ff9a1d6d9..53444c3e8f2f 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -37,12 +37,14 @@
>> Â struct iommu_dma_msi_page {
>> ÂÂÂÂÂ struct list_headÂÂÂ list;
>> ÂÂÂÂÂ dma_addr_tÂÂÂÂÂÂÂ iova;
>> +ÂÂÂ dma_addr_tÂÂÂÂÂÂÂ ipa;
>> ÂÂÂÂÂ phys_addr_tÂÂÂÂÂÂÂ phys;
>> Â };
>> Â Â enum iommu_dma_cookie_type {
>> ÂÂÂÂÂ IOMMU_DMA_IOVA_COOKIE,
>> ÂÂÂÂÂ IOMMU_DMA_MSI_COOKIE,
>> +ÂÂÂ IOMMU_DMA_NESTED_MSI_COOKIE,
>> Â };
>> Â Â struct iommu_dma_cookie {
>> @@ -109,14 +111,17 @@ EXPORT_SYMBOL(iommu_get_dma_cookie);
>> ÂÂ *
>> ÂÂ * Users who manage their own IOVA allocation and do not want DMA
>> API support,
>> ÂÂ * but would still like to take advantage of automatic MSI
>> remapping, can use
>> - * this to initialise their own domain appropriately. Users should
>> reserve a
>> + * this to initialise their own domain appropriately. Users may
>> reserve a
>> ÂÂ * contiguous IOVA region, starting at @base, large enough to
>> accommodate the
>> ÂÂ * number of PAGE_SIZE mappings necessary to cover every MSI
>> doorbell address
>> - * used by the devices attached to @domain.
>> + * used by the devices attached to @domain. The other way round is to
>> provide
>> + * usable iova pages through the iommu_dma_bind_doorbell API (nested
>> stages
>> + * use case)
>> ÂÂ */
>> Â int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
>> Â {
>> ÂÂÂÂÂ struct iommu_dma_cookie *cookie;
>> +ÂÂÂ int nesting, ret;
>> Â ÂÂÂÂÂ if (domain->type != IOMMU_DOMAIN_UNMANAGED)
>> ÂÂÂÂÂÂÂÂÂ return -EINVAL;
>> @@ -124,7 +129,12 @@ int iommu_get_msi_cookie(struct iommu_domain
>> *domain, dma_addr_t base)
>> ÂÂÂÂÂ if (domain->iova_cookie)
>> ÂÂÂÂÂÂÂÂÂ return -EEXIST;
>> Â -ÂÂÂ cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
>> +ÂÂÂ ret =Â iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nesting);
>> +ÂÂÂ if (!ret && nesting)
>> +ÂÂÂÂÂÂÂ cookie = cookie_alloc(IOMMU_DMA_NESTED_MSI_COOKIE);
>> +ÂÂÂ else
>> +ÂÂÂÂÂÂÂ cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
>> +
>> ÂÂÂÂÂ if (!cookie)
>> ÂÂÂÂÂÂÂÂÂ return -ENOMEM;
>> Â @@ -145,6 +155,7 @@ void iommu_put_dma_cookie(struct iommu_domain
>> *domain)
>> Â {
>> ÂÂÂÂÂ struct iommu_dma_cookie *cookie = domain->iova_cookie;
>> ÂÂÂÂÂ struct iommu_dma_msi_page *msi, *tmp;
>> +ÂÂÂ bool s2_unmap = false;
>> Â ÂÂÂÂÂ if (!cookie)
>> ÂÂÂÂÂÂÂÂÂ return;
>> @@ -152,7 +163,15 @@ void iommu_put_dma_cookie(struct iommu_domain
>> *domain)
>> ÂÂÂÂÂ if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
>> ÂÂÂÂÂÂÂÂÂ put_iova_domain(&cookie->iovad);
>> Â +ÂÂÂ if (cookie->type == IOMMU_DMA_NESTED_MSI_COOKIE)
>> +ÂÂÂÂÂÂÂ s2_unmap = true;
>> +
>> ÂÂÂÂÂ list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
>> +ÂÂÂÂÂÂÂ if (s2_unmap && msi->phys) {
>> +ÂÂÂÂÂÂÂÂÂÂÂ size_t size = cookie_msi_granule(cookie);
>> +
>> +ÂÂÂÂÂÂÂÂÂÂÂ WARN_ON(iommu_unmap(domain, msi->ipa, size) != size);
>> +ÂÂÂÂÂÂÂ }
>> ÂÂÂÂÂÂÂÂÂ list_del(&msi->list);
>> ÂÂÂÂÂÂÂÂÂ kfree(msi);
>> ÂÂÂÂÂ }
>> @@ -161,6 +180,50 @@ void iommu_put_dma_cookie(struct iommu_domain
>> *domain)
>> Â }
>> Â EXPORT_SYMBOL(iommu_put_dma_cookie);
>> Â +/**
>> + * iommu_dma_bind_doorbell - Allows to provide a usable IOVA page
>> + * @domain: domain handle
>> + * @binding: IOVA/IPA binding
>> + *
>> + * In nested stage use case, the user can provide IOVA/IPA bindings
>> + * corresponding to a guest MSI stage 1 mapping. When the host needs
>> + * to map its own MSI doorbells, it can use the IPA as stage 2 input
>> + * and map it onto the physical MSI doorbell.
>> + */
>> +int iommu_dma_bind_doorbell(struct iommu_domain *domain,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct iommu_guest_msi_binding *binding)
>> +{
>> +ÂÂÂ struct iommu_dma_cookie *cookie = domain->iova_cookie;
>> +ÂÂÂ struct iommu_dma_msi_page *msi;
>> +ÂÂÂ dma_addr_t ipa, iova;
>> +ÂÂÂ size_t size;
>> +
>> +ÂÂÂ if (!cookie)
>> +ÂÂÂÂÂÂÂ return -EINVAL;
>> +
>> +ÂÂÂ if (cookie->type != IOMMU_DMA_NESTED_MSI_COOKIE)
>> +ÂÂÂÂÂÂÂ return -EINVAL;
>> +
>> +ÂÂÂ size = 1 << binding->granule;
>> +ÂÂÂ iova = binding->iova & ~(phys_addr_t)(size - 1);
>> +ÂÂÂ ipa = binding->gpa & ~(phys_addr_t)(size - 1);
>> +
>> +ÂÂÂ list_for_each_entry(msi, &cookie->msi_page_list, list) {
>> +ÂÂÂÂÂÂÂ if (msi->iova == iova)
>> +ÂÂÂÂÂÂÂÂÂÂÂ return 0; /* this page is already registered */
>> +ÂÂÂ }
>> +
>> +ÂÂÂ msi = kzalloc(sizeof(*msi), GFP_KERNEL);
>> +ÂÂÂ if (!msi)
>> +ÂÂÂÂÂÂÂ return -ENOMEM;
>> +
>> +ÂÂÂ msi->iova = iova;
>> +ÂÂÂ msi->ipa = ipa;
>> +ÂÂÂ list_add(&msi->list, &cookie->msi_page_list);
>> +ÂÂÂ return 0;
>> +}
>> +EXPORT_SYMBOL(iommu_dma_bind_doorbell);
>> +
>> Â /**
>> ÂÂ * iommu_dma_get_resv_regions - Reserved region driver helper
>> ÂÂ * @dev: Device from iommu_get_resv_regions()
>> @@ -846,6 +909,34 @@ static struct iommu_dma_msi_page
>> *iommu_dma_get_msi_page(struct device *dev,
>> ÂÂÂÂÂÂÂÂÂ if (msi_page->phys == msi_addr)
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ return msi_page;
>> Â +ÂÂÂ /*
>> +ÂÂÂÂ * In nested stage mode, we do not allocate an MSI page in
>> +ÂÂÂÂ * a range provided by the user. Instead, IOVA/IPA bindings are
>> +ÂÂÂÂ * individually provided. We reuse thise IOVAs to build the
>> +ÂÂÂÂ * IOVA -> IPA -> MSI PA nested stage mapping.
>> +ÂÂÂÂ */
>> +ÂÂÂ if (cookie->type == IOMMU_DMA_NESTED_MSI_COOKIE) {
>> +ÂÂÂÂÂÂÂ list_for_each_entry(msi_page, &cookie->msi_page_list, list)
>> +ÂÂÂÂÂÂÂÂÂÂÂ if (!msi_page->phys) { /* this binding is free to use */
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dma_addr_t ipa = msi_page->ipa;
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ int ret;
>> +
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ msi_page->phys = msi_addr;
>> +
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ /* do the stage 2 mapping */
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ret = iommu_map(domain, ipa, msi_addr, size,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_MMIO | IOMMU_WRITE);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (ret) {
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pr_warn("MSI S2 mapping failed (%d)\n",
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ret);
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return NULL;
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return msi_page;
>> +ÂÂÂÂÂÂÂÂÂÂÂ }
>> +ÂÂÂÂÂÂÂ pr_warn("%s no MSI binding found\n", __func__);
>> +ÂÂÂÂÂÂÂ return NULL;
>> +ÂÂÂ }
>> +
>> ÂÂÂÂÂ msi_page = kzalloc(sizeof(*msi_page), GFP_ATOMIC);
>> ÂÂÂÂÂ if (!msi_page)
>> ÂÂÂÂÂÂÂÂÂ return NULL;
>> diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
>> index e8ca5e654277..324745eef644 100644
>> --- a/include/linux/dma-iommu.h
>> +++ b/include/linux/dma-iommu.h
>> @@ -24,6 +24,7 @@
>> Â #include <linux/dma-mapping.h>
>> Â #include <linux/iommu.h>
>> Â #include <linux/msi.h>
>> +#include <uapi/linux/iommu.h>
>> Â Â int iommu_dma_init(void);
>> Â @@ -74,12 +75,15 @@ int iommu_dma_mapping_error(struct device *dev,
>> dma_addr_t dma_addr);
>> Â /* The DMA API isn't _quite_ the whole story, though... */
>> Â void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg);
>> Â void iommu_dma_get_resv_regions(struct device *dev, struct list_head
>> *list);
>> +int iommu_dma_bind_doorbell(struct iommu_domain *domain,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct iommu_guest_msi_binding *binding);
>> Â Â #else
>> Â Â struct iommu_domain;
>> Â struct msi_msg;
>> Â struct device;
>> +struct iommu_guest_msi_binding;
>> Â Â static inline int iommu_dma_init(void)
>> Â {
>> @@ -104,6 +108,13 @@ static inline void iommu_dma_map_msi_msg(int irq,
>> struct msi_msg *msg)
>> Â {
>> Â }
>> Â +static inline int
>> +iommu_dma_bind_doorbell(struct iommu_domain *domain,
>> +ÂÂÂÂÂÂÂÂÂÂÂ struct iommu_guest_msi_binding *binding)
>> +{
>> +ÂÂÂ return -ENODEV;
>> +}
>> +
>> Â static inline void iommu_dma_get_resv_regions(struct device *dev,
>> struct list_head *list)
>> Â {
>> Â }
>>