[PATCH v1] vdpa: Do not count the pages that were already pinned in the vhost-vDPA

From: Cindy Lu
Date: Mon May 09 2022 - 03:25:58 EST


We count pinned_vm as follow in vhost-vDPA

lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
goto unlock;
}
This means if we have two vDPA devices for the same VM the pages would be counted twice
So we add a tree to save the page that counted and we will not count it again

Signed-off-by: Cindy Lu <lulu@xxxxxxxxxx>
---
drivers/vhost/vdpa.c | 79 ++++++++++++++++++++++++++++++++++++++--
include/linux/mm_types.h | 2 +
2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 05f5fd2af58f..48cb5c8264b5 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -24,6 +24,9 @@
#include <linux/vhost.h>

#include "vhost.h"
+#include <linux/rbtree.h>
+#include <linux/interval_tree.h>
+#include <linux/interval_tree_generic.h>

enum {
VHOST_VDPA_BACKEND_FEATURES =
@@ -505,6 +508,50 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
mutex_unlock(&d->mutex);
return r;
}
+int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last)
+{
+ struct interval_tree_node *new_node;
+
+ if (last < start)
+ return -EFAULT;
+
+ /* If the range being mapped is [0, ULONG_MAX], split it into two entries
+ * otherwise its size would overflow u64.
+ */
+ if (start == 0 && last == ULONG_MAX) {
+ u64 mid = last / 2;
+
+ vhost_vdpa_add_range_ctx(root, start, mid);
+ start = mid + 1;
+ }
+
+ new_node = kmalloc(sizeof(struct interval_tree_node), GFP_ATOMIC);
+ if (!new_node)
+ return -ENOMEM;
+
+ new_node->start = start;
+ new_node->last = last;
+
+ interval_tree_insert(new_node, root);
+
+ return 0;
+}
+
+void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last)
+{
+ struct interval_tree_node *new_node;
+
+ while ((new_node = interval_tree_iter_first(root, start, last))) {
+ interval_tree_remove(new_node, root);
+ kfree(new_node);
+ }
+}
+
+struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root,
+ u64 start, u64 last)
+{
+ return interval_tree_iter_first(root, start, last);
+}

static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
{
@@ -513,6 +560,7 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
struct vhost_iotlb_map *map;
struct page *page;
unsigned long pfn, pinned;
+ struct interval_tree_node *new_node = NULL;

while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
pinned = PFN_DOWN(map->size);
@@ -523,7 +571,18 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
set_page_dirty_lock(page);
unpin_user_page(page);
}
- atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
+
+ new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa,
+ map->start,
+ map->start + map->size - 1);
+
+ if (new_node) {
+ vhost_vdpa_del_range(&dev->mm->root_for_vdpa,
+ map->start,
+ map->start + map->size - 1);
+ atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
+ }
+
vhost_iotlb_map_free(iotlb, map);
}
}
@@ -591,6 +650,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
int r = 0;
+ struct interval_tree_node *new_node = NULL;

r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
pa, perm, opaque);
@@ -611,9 +671,22 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
return r;
}

- if (!vdpa->use_va)
- atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
+ if (!vdpa->use_va) {
+ new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa,
+ iova, iova + size - 1);
+
+ if (new_node == 0) {
+ r = vhost_vdpa_add_range_ctx(&dev->mm->root_for_vdpa,
+ iova, iova + size - 1);
+ if (r) {
+ vhost_iotlb_del_range(dev->iotlb, iova,
+ iova + size - 1);
+ return r;
+ }

+ atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
+ }
+ }
return 0;
}

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5140e5feb486..46eaa6d0560b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -634,6 +634,8 @@ struct mm_struct {
#ifdef CONFIG_IOMMU_SUPPORT
u32 pasid;
#endif
+ struct rb_root_cached root_for_vdpa;
+
} __randomize_layout;

/*
--
2.34.1