Re: [PATCH v1] vdpa: Do not count the pages that were already pinned in the vhost-vDPA

From: Jason Wang
Date: Mon May 09 2022 - 04:21:13 EST


On Mon, May 9, 2022 at 3:15 PM Cindy Lu <lulu@xxxxxxxxxx> wrote:
>
> We count pinned_vm as follow in vhost-vDPA
>
> lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
> ret = -ENOMEM;
> goto unlock;
> }
> This means if we have two vDPA devices for the same VM the pages would be counted twice
> So we add a tree to save the page that counted and we will not count it again
>
> Signed-off-by: Cindy Lu <lulu@xxxxxxxxxx>
> ---
> drivers/vhost/vdpa.c | 79 ++++++++++++++++++++++++++++++++++++++--
> include/linux/mm_types.h | 2 +
> 2 files changed, 78 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 05f5fd2af58f..48cb5c8264b5 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -24,6 +24,9 @@
> #include <linux/vhost.h>
>
> #include "vhost.h"
> +#include <linux/rbtree.h>
> +#include <linux/interval_tree.h>
> +#include <linux/interval_tree_generic.h>
>
> enum {
> VHOST_VDPA_BACKEND_FEATURES =
> @@ -505,6 +508,50 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
> mutex_unlock(&d->mutex);
> return r;
> }
> +int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last)
> +{
> + struct interval_tree_node *new_node;
> +
> + if (last < start)
> + return -EFAULT;
> +
> + /* If the range being mapped is [0, ULONG_MAX], split it into two entries
> + * otherwise its size would overflow u64.
> + */
> + if (start == 0 && last == ULONG_MAX) {
> + u64 mid = last / 2;
> +
> + vhost_vdpa_add_range_ctx(root, start, mid);
> + start = mid + 1;
> + }
> +
> + new_node = kmalloc(sizeof(struct interval_tree_node), GFP_ATOMIC);
> + if (!new_node)
> + return -ENOMEM;
> +
> + new_node->start = start;
> + new_node->last = last;
> +
> + interval_tree_insert(new_node, root);
> +
> + return 0;
> +}
> +
> +void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last)
> +{
> + struct interval_tree_node *new_node;
> +
> + while ((new_node = interval_tree_iter_first(root, start, last))) {
> + interval_tree_remove(new_node, root);
> + kfree(new_node);
> + }
> +}
> +
> +struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root,
> + u64 start, u64 last)
> +{
> + return interval_tree_iter_first(root, start, last);
> +}
>
> static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> {
> @@ -513,6 +560,7 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> struct vhost_iotlb_map *map;
> struct page *page;
> unsigned long pfn, pinned;
> + struct interval_tree_node *new_node = NULL;
>
> while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
> pinned = PFN_DOWN(map->size);
> @@ -523,7 +571,18 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> set_page_dirty_lock(page);
> unpin_user_page(page);
> }
> - atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
> +
> + new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa,
> + map->start,
> + map->start + map->size - 1);
> +
> + if (new_node) {
> + vhost_vdpa_del_range(&dev->mm->root_for_vdpa,
> + map->start,
> + map->start + map->size - 1);
> + atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
> + }
> +
> vhost_iotlb_map_free(iotlb, map);
> }
> }
> @@ -591,6 +650,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
> struct vdpa_device *vdpa = v->vdpa;
> const struct vdpa_config_ops *ops = vdpa->config;
> int r = 0;
> + struct interval_tree_node *new_node = NULL;
>
> r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
> pa, perm, opaque);
> @@ -611,9 +671,22 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
> return r;
> }
>
> - if (!vdpa->use_va)
> - atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> + if (!vdpa->use_va) {
> + new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa,
> + iova, iova + size - 1);
> +
> + if (new_node == 0) {
> + r = vhost_vdpa_add_range_ctx(&dev->mm->root_for_vdpa,
> + iova, iova + size - 1);
> + if (r) {
> + vhost_iotlb_del_range(dev->iotlb, iova,
> + iova + size - 1);
> + return r;
> + }
>
> + atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> + }

This seems not sufficient, consider:

vhost-vDPA-A: add [A, B)
vhost-vDPA-B: add [A, C) (C>B)

We lose the accounting for [B, C)?

> + }
> return 0;
> }
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 5140e5feb486..46eaa6d0560b 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -634,6 +634,8 @@ struct mm_struct {
> #ifdef CONFIG_IOMMU_SUPPORT
> u32 pasid;
> #endif
> + struct rb_root_cached root_for_vdpa;
> +

Let's avoid touching mm_structure unless it's a must.

We can allocate something like vhost_mm if needed during SET_OWNER.

Thanks

> } __randomize_layout;
>
> /*
> --
> 2.34.1
>