Re: [PATCH v7 14/23] iommu/smmuv3: Implement cache_invalidate

From: Auger Eric
Date: Mon May 13 2019 - 10:06:40 EST


Hi Robin,

On 5/13/19 4:01 PM, Robin Murphy wrote:
> On 13/05/2019 13:16, Auger Eric wrote:
>> Hi Robin,
>> On 5/8/19 5:01 PM, Robin Murphy wrote:
>>> On 08/04/2019 13:19, Eric Auger wrote:
>>>> Implement domain-selective and page-selective IOTLB invalidations.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>
>>>>
>>>> ---
>>>> v6 -> v7
>>>> - check the uapi version
>>>>
>>>> v3 -> v4:
>>>> - adapt to changes in the uapi
>>>> - add support for leaf parameter
>>>> - do not use arm_smmu_tlb_inv_range_nosync or arm_smmu_tlb_inv_context
>>>> ÂÂÂ anymore
>>>>
>>>> v2 -> v3:
>>>> - replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync
>>>>
>>>> v1 -> v2:
>>>> - properly pass the asid
>>>> ---
>>>> ÂÂ drivers/iommu/arm-smmu-v3.c | 60
>>>> +++++++++++++++++++++++++++++++++++++
>>>> ÂÂ 1 file changed, 60 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>>>> index 1486baf53425..4366921d8318 100644
>>>> --- a/drivers/iommu/arm-smmu-v3.c
>>>> +++ b/drivers/iommu/arm-smmu-v3.c
>>>> @@ -2326,6 +2326,65 @@ static void arm_smmu_detach_pasid_table(struct
>>>> iommu_domain *domain)
>>>> ÂÂÂÂÂÂ mutex_unlock(&smmu_domain->init_mutex);
>>>> ÂÂ }
>>>> ÂÂ +static int
>>>> +arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device
>>>> *dev,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂ struct iommu_cache_invalidate_info *inv_info)
>>>> +{
>>>> +ÂÂÂ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> +ÂÂÂ struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> +
>>>> +ÂÂÂ if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
>>>> +ÂÂÂÂÂÂÂ return -EINVAL;
>>>> +
>>>> +ÂÂÂ if (!smmu)
>>>> +ÂÂÂÂÂÂÂ return -EINVAL;
>>>> +
>>>> +ÂÂÂ if (inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
>>>> +ÂÂÂÂÂÂÂ return -EINVAL;
>>>> +
>>>> +ÂÂÂ if (inv_info->cache & IOMMU_CACHE_INV_TYPE_IOTLB) {
>>>> +ÂÂÂÂÂÂÂ if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ struct arm_smmu_cmdq_ent cmd = {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .opcode = CMDQ_OP_TLBI_NH_ASID,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .tlbi = {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .vmid = smmu_domain->s2_cfg.vmid,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .asid = inv_info->pasid,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ };
>>>> +
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_sync(smmu);
>>>
>>> I'd much rather make arm_smmu_tlb_inv_context() understand nested
>>> domains than open-code commands all over the place.
>>
>>
>>>
>>>> +
>>>> +ÂÂÂÂÂÂÂ } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ struct iommu_inv_addr_info *info = &inv_info->addr_info;
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ size_t size = info->nb_granules * info->granule_size;
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ bool leaf = info->flags & IOMMU_INV_ADDR_FLAGS_LEAF;
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ struct arm_smmu_cmdq_ent cmd = {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .opcode = CMDQ_OP_TLBI_NH_VA,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .tlbi = {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .addr = info->addr,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .vmid = smmu_domain->s2_cfg.vmid,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .asid = info->pasid,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .leaf = leaf,
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ };
>>>> +
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ do {
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>>>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ cmd.tlbi.addr += info->granule_size;
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ } while (size -= info->granule_size);
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_sync(smmu);
>>>
>>> An this in particular I would really like to go all the way through
>>> io_pgtable_tlb_add_flush()/io_pgtable_sync() if at all possible. Hooking
>>> up range-based invalidations is going to be a massive headache if the
>>> abstraction isn't solid.
>>
>> The concern is the host does not "own" the s1 config asid
>> (smmu_domain->s1_cfg.cd.asid is not set, practically). In our case the
>> asid only is passed by the userspace on CACHE_INVALIDATE ioctl call.
>>
>> arm_smmu_tlb_inv_context and arm_smmu_tlb_inv_range_nosync use this field
>
> Right, but that's not exactly hard to solve. Even just something like
> the (untested, purely illustrative) refactoring below would be beneficial.
Sure I can go this way.

Thank you for detailing

Eric
>
> Robin.
>
> ----->8-----
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index d3880010c6cf..31ef703cf671 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -1423,11 +1423,9 @@ static void arm_smmu_tlb_inv_context(void *cookie)
> ÂÂÂÂ arm_smmu_cmdq_issue_sync(smmu);
> Â}
>
> -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ size_t granule, bool leaf, void *cookie)
> +static void __arm_smmu_tlb_inv_range(struct arm_smmu_domain
> *smmu_domain, u16 asid,
> +ÂÂÂÂÂÂÂ unsigned long iova, size_t size, size_t granule, bool leaf)
> Â{
> -ÂÂÂ struct arm_smmu_domain *smmu_domain = cookie;
> -ÂÂÂ struct arm_smmu_device *smmu = smmu_domain->smmu;
> ÂÂÂÂ struct arm_smmu_cmdq_ent cmd = {
> ÂÂÂÂÂÂÂÂ .tlbi = {
> ÂÂÂÂÂÂÂÂÂÂÂÂ .leafÂÂÂ = leaf,
> @@ -1437,18 +1435,27 @@ static void
> arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>
> ÂÂÂÂ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> ÂÂÂÂÂÂÂÂ cmd.opcodeÂÂÂ = CMDQ_OP_TLBI_NH_VA;
> -ÂÂÂÂÂÂÂ cmd.tlbi.asidÂÂÂ = smmu_domain->s1_cfg.cd.asid;
> +ÂÂÂÂÂÂÂ cmd.tlbi.asidÂÂÂ = asid;
> ÂÂÂÂ } else {
> ÂÂÂÂÂÂÂÂ cmd.opcodeÂÂÂ = CMDQ_OP_TLBI_S2_IPA;
> ÂÂÂÂÂÂÂÂ cmd.tlbi.vmidÂÂÂ = smmu_domain->s2_cfg.vmid;
> ÂÂÂÂ }
>
> ÂÂÂÂ do {
> -ÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
> +ÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_cmd(smmu_domain->smmu, &cmd);
> ÂÂÂÂÂÂÂÂ cmd.tlbi.addr += granule;
> ÂÂÂÂ } while (size -= granule);
> Â}
>
> +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ size_t granule, bool leaf, void *cookie)
> +{
> +ÂÂÂ struct arm_smmu_domain *smmu_domain = cookie;
> +
> +ÂÂÂ __arm_smmu_tlb_inv_range(smmu_domain, smmu_domain->s1_cfg.cd.asid,
> iova,
> +ÂÂÂÂÂÂÂÂÂÂÂ size, granule, leaf);
> +}
> +
> Âstatic const struct iommu_gather_ops arm_smmu_gather_ops = {
> ÂÂÂÂ .tlb_flush_allÂÂÂ = arm_smmu_tlb_inv_context,
> ÂÂÂÂ .tlb_add_flushÂÂÂ = arm_smmu_tlb_inv_range_nosync,