RE: [RFC PATCH v2 15/19] vhost-vdpa: support ASID based IOTLB API

From: Gautam Dawar
Date: Mon Mar 07 2022 - 06:01:25 EST


-----Original Message-----
From: Eugenio Perez Martin <eperezma@xxxxxxxxxx>
Sent: Friday, March 4, 2022 11:35 PM
To: Gautam Dawar <gdawar@xxxxxxxxxx>
Cc: Gautam Dawar <gdawar@xxxxxxxxxx>; Martin Petrus Hubertus Habets <martinh@xxxxxxxxxx>; Harpreet Singh Anand <hanand@xxxxxxxxxx>; Tanuj Murlidhar Kamde <tanujk@xxxxxxxxxx>; Jason Wang <jasowang@xxxxxxxxxx>; Michael S. Tsirkin <mst@xxxxxxxxxx>; Zhu Lingshan <lingshan.zhu@xxxxxxxxx>; Stefano Garzarella <sgarzare@xxxxxxxxxx>; Xie Yongji <xieyongji@xxxxxxxxxxxxx>; Eli Cohen <elic@xxxxxxxxxx>; Si-Wei Liu <si-wei.liu@xxxxxxxxxx>; Parav Pandit <parav@xxxxxxxxxx>; Longpeng <longpeng2@xxxxxxxxxx>; virtualization <virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx>; linux-kernel@xxxxxxxxxxxxxxx; kvm list <kvm@xxxxxxxxxxxxxxx>; netdev@xxxxxxxxxxxxxxx
Subject: Re: [RFC PATCH v2 15/19] vhost-vdpa: support ASID based IOTLB API

On Thu, Feb 24, 2022 at 10:28 PM Gautam Dawar <gautam.dawar@xxxxxxxxxx> wrote:
>
> This patch extends the vhost-vdpa to support ASID based IOTLB API. The
> vhost-vdpa device will allocated multiple IOTLBs for vDPA device that
> supports multiple address spaces. The IOTLBs and vDPA device memory
> mappings is determined and maintained through ASID.
>
> Note that we still don't support vDPA device with more than one
> address spaces that depends on platform IOMMU. This work will be done
> by moving the IOMMU logic from vhost-vDPA to vDPA device driver.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
> Signed-off-by: Gautam Dawar <gdawar@xxxxxxxxxx>
> ---
> drivers/vhost/vdpa.c | 129 ++++++++++++++++++++++++++++++++----------
> drivers/vhost/vhost.c | 2 +-
> 2 files changed, 100 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> 47e6cf9d0881..4bcf824e3b12 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -28,7 +28,8 @@
> enum {
> VHOST_VDPA_BACKEND_FEATURES =
> (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
> - (1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
> + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
> + (1ULL << VHOST_BACKEND_F_IOTLB_ASID),
> };
>
> #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) @@ -57,13 +58,20 @@
> struct vhost_vdpa {
> struct eventfd_ctx *config_ctx;
> int in_batch;
> struct vdpa_iova_range range;
> - int used_as;
> + u32 batch_asid;
> };
>
> static DEFINE_IDA(vhost_vdpa_ida);
>
> static dev_t vhost_vdpa_major;
>
> +static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) {
> + struct vhost_vdpa_as *as = container_of(iotlb, struct
> + vhost_vdpa_as, iotlb);
> + return as->id;
> +}
> +
> static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32
> asid) {
> struct hlist_head *head = &v->as[asid %
> VHOST_VDPA_IOTLB_BUCKETS]; @@ -76,6 +84,16 @@ static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
> return NULL;
> }
>
> +static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32
> +asid) {
> + struct vhost_vdpa_as *as = asid_to_as(v, asid);
> +
> + if (!as)
> + return NULL;
> +
> + return &as->iotlb;
> +}
> +
> static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa
> *v, u32 asid) {
> struct hlist_head *head = &v->as[asid %
> VHOST_VDPA_IOTLB_BUCKETS]; @@ -84,6 +102,9 @@ static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
> if (asid_to_as(v, asid))
> return NULL;
>
> + if (asid >= v->vdpa->nas)
> + return NULL;
> +
> as = kmalloc(sizeof(*as), GFP_KERNEL);
> if (!as)
> return NULL;
> @@ -91,18 +112,24 @@ static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
> vhost_iotlb_init(&as->iotlb, 0, 0);
> as->id = asid;
> hlist_add_head(&as->hash_link, head);
> - ++v->used_as;
>
> return as;
> }
>
> -static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
> +static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
> + u32 asid)
> {
> struct vhost_vdpa_as *as = asid_to_as(v, asid);
>
> - /* Remove default address space is not allowed */
> - if (asid == 0)
> - return -EINVAL;
> + if (as)
> + return as;
> +
> + return vhost_vdpa_alloc_as(v, asid); }
> +
> +static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) {
> + struct vhost_vdpa_as *as = asid_to_as(v, asid);
>
> if (!as)
> return -EINVAL;
> @@ -110,7 +137,6 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
> hlist_del(&as->hash_link);
> vhost_iotlb_reset(&as->iotlb);
> kfree(as);
> - --v->used_as;
>
> return 0;
> }
> @@ -665,6 +691,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> struct vhost_dev *dev = &v->vdev;
> struct vdpa_device *vdpa = v->vdpa;
> const struct vdpa_config_ops *ops = vdpa->config;
> + u32 asid = iotlb_to_asid(iotlb);
> int r = 0;
>
> r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, @@
> -673,10 +700,10 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> return r;
>
> if (ops->dma_map) {
> - r = ops->dma_map(vdpa, 0, iova, size, pa, perm, opaque);
> + r = ops->dma_map(vdpa, asid, iova, size, pa, perm,
> + opaque);
> } else if (ops->set_map) {
> if (!v->in_batch)
> - r = ops->set_map(vdpa, 0, iotlb);
> + r = ops->set_map(vdpa, asid, iotlb);
> } else {
> r = iommu_map(v->domain, iova, pa, size,
> perm_to_iommu_flags(perm)); @@ -692,23
> +719,35 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> return 0;
> }
>
> -static void vhost_vdpa_unmap(struct vhost_vdpa *v,
> - struct vhost_iotlb *iotlb,
> - u64 iova, u64 size)
> +static int vhost_vdpa_unmap(struct vhost_vdpa *v,
> + struct vhost_iotlb *iotlb,
> + u64 iova, u64 size)
> {
> struct vdpa_device *vdpa = v->vdpa;
> const struct vdpa_config_ops *ops = vdpa->config;
> + u32 asid = iotlb_to_asid(iotlb);
> +
> + if (!iotlb)
> + return -EINVAL;

I think there is no need of checking for this. Similar functions assume the caller will pass non-null arguments, and vhost_vdpa_process_iotlb_msg does it.

With that into account, I think there is little point in making this function return something different than void.

Apart from that, iotlb is already used before checking for NULL.
[GD>>] Agreed. Will remove the unnecessary check and change the return type to void.

>
> vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1);
>
> if (ops->dma_map) {
> - ops->dma_unmap(vdpa, 0, iova, size);
> + ops->dma_unmap(vdpa, asid, iova, size);
> } else if (ops->set_map) {
> if (!v->in_batch)
> - ops->set_map(vdpa, 0, iotlb);
> + ops->set_map(vdpa, asid, iotlb);
> } else {
> iommu_unmap(v->domain, iova, size);
> }
> +
> + /* If we are in the middle of batch processing, delay the free
> + * of AS until BATCH_END.
> + */
> + if (!v->in_batch && !iotlb->nmaps)
> + vhost_vdpa_remove_as(v, asid);
> +
> + return 0;
> }
>
> static int vhost_vdpa_va_map(struct vhost_vdpa *v, @@ -916,33 +955,55
> @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
> struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
> struct vdpa_device *vdpa = v->vdpa;
> const struct vdpa_config_ops *ops = vdpa->config;
> - struct vhost_vdpa_as *as = asid_to_as(v, 0);
> - struct vhost_iotlb *iotlb = &as->iotlb;
> + struct vhost_iotlb *iotlb = NULL;
> + struct vhost_vdpa_as *as = NULL;
> int r = 0;
>
> mutex_lock(&dev->mutex);
>
> - if (asid != 0)
> - return -EINVAL;
> -
> r = vhost_dev_check_owner(dev);
> if (r)
> goto unlock;
>
> + if (msg->type == VHOST_IOTLB_UPDATE ||
> + msg->type == VHOST_IOTLB_BATCH_BEGIN) {
> + as = vhost_vdpa_find_alloc_as(v, asid);
> + if (!as) {
> + dev_err(&v->dev, "can't find and alloc asid %d\n",
> + asid);
> + return -EINVAL;
> + }
> + iotlb = &as->iotlb;
> + } else
> + iotlb = asid_to_iotlb(v, asid);
> +
> + if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
> + if (v->in_batch && v->batch_asid != asid) {
> + dev_info(&v->dev, "batch id %d asid %d\n",
> + v->batch_asid, asid);
> + }
> + if (!iotlb)
> + dev_err(&v->dev, "no iotlb for asid %d\n", asid);
> + return -EINVAL;
> + }
> +
> switch (msg->type) {
> case VHOST_IOTLB_UPDATE:
> r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
> break;
> case VHOST_IOTLB_INVALIDATE:
> - vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
> + r = vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
> break;
> case VHOST_IOTLB_BATCH_BEGIN:
> + v->batch_asid = asid;
> v->in_batch = true;
> break;
> case VHOST_IOTLB_BATCH_END:
> if (v->in_batch && ops->set_map)
> - ops->set_map(vdpa, 0, iotlb);
> + ops->set_map(vdpa, asid, iotlb);
> v->in_batch = false;
> + if (!iotlb->nmaps)
> + vhost_vdpa_remove_as(v, asid);
> break;
> default:
> r = -EINVAL;
> @@ -1030,9 +1091,17 @@ static void vhost_vdpa_set_iova_range(struct
> vhost_vdpa *v)
>
> static void vhost_vdpa_cleanup(struct vhost_vdpa *v) {
> + struct vhost_vdpa_as *as;
> + u32 asid;
> +
> vhost_dev_cleanup(&v->vdev);
> kfree(v->vdev.vqs);
> - vhost_vdpa_remove_as(v, 0);
> +
> + for (asid = 0; asid < v->vdpa->nas; asid++) {
> + as = asid_to_as(v, asid);
> + if (as)
> + vhost_vdpa_remove_as(v, asid);
> + }
> }
>
> static int vhost_vdpa_open(struct inode *inode, struct file *filep)
> @@ -1067,12 +1136,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
> vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
> vhost_vdpa_process_iotlb_msg);
>
> - if (!vhost_vdpa_alloc_as(v, 0))
> - goto err_alloc_as;
> -
> r = vhost_vdpa_alloc_domain(v);
> if (r)
> - goto err_alloc_as;
> + goto err_alloc_domain;
>
> vhost_vdpa_set_iova_range(v);
>
> @@ -1080,7 +1146,7 @@ static int vhost_vdpa_open(struct inode *inode,
> struct file *filep)
>
> return 0;
>
> -err_alloc_as:
> +err_alloc_domain:
> vhost_vdpa_cleanup(v);
> err:
> atomic_dec(&v->opened);
> @@ -1205,8 +1271,11 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
> int minor;
> int i, r;
>
> - /* Only support 1 address space and 1 groups */
> - if (vdpa->ngroups != 1 || vdpa->nas != 1)
> + /* We can't support platform IOMMU device with more than 1
> + * group or as
> + */
> + if (!ops->set_map && !ops->dma_map &&
> + (vdpa->ngroups > 1 || vdpa->nas > 1))
> return -EOPNOTSUPP;
>
> v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index
> 1f514d98f0de..92eeb684c84d 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -1167,7 +1167,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
> ret = -EINVAL;
> goto done;
> }
> - offset = sizeof(__u16);
> + offset = 0;
> } else
> offset = sizeof(__u32);
> break;
> --
> 2.25.0
>