Re: [PATCH V4 5/5] misc: mlx5ctl: Add umem reg/unreg ioctl

From: Vegard Nossum
Date: Thu Feb 29 2024 - 06:51:47 EST



On 07/02/2024 08:24, Saeed Mahameed wrote:
+static int mlx5ctl_ioctl_umem_unreg(struct file *file,
+ struct mlx5ctl_umem_unreg __user *arg,
+ size_t usize)
+{
+ size_t ksize = sizeof(struct mlx5ctl_umem_unreg);
+ struct mlx5ctl_fd *mfd = file->private_data;
+ struct mlx5ctl_umem_unreg *umem_unreg;
+ int err = 0;
+
+ if (usize < ksize)
+ return -EINVAL;
+
+ umem_unreg = kzalloc(ksize, GFP_KERNEL);
+ if (!umem_unreg)
+ return -ENOMEM;
+
+ if (copy_from_user(umem_unreg, arg, ksize)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ if (umem_unreg->reserved) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = mlx5ctl_umem_unreg(mfd->umem_db, umem_unreg->umem_id);
+
+ if (!err && copy_to_user(arg, umem_unreg, ksize))
+ err = -EFAULT;

Why do you copy this back to userspace, does it even change?

+static struct mlx5ctl_umem *mlx5ctl_umem_pin(struct mlx5ctl_umem_db *umem_db,
+ unsigned long addr, size_t size)
+{
+ size_t npages = umem_num_pages(addr, size);
+ struct mlx5_core_dev *mdev = umem_db->mdev;
+ unsigned long endaddr = addr + size;
+ struct mlx5ctl_umem *umem;
+ struct page **page_list;
+ int err = -EINVAL;
+ int pinned = 0;
+
+ dev_dbg(mdev->device, "%s: addr %p size %zu npages %zu\n",
+ __func__, (void __user *)addr, size, npages);
+
+ /* Avoid integer overflow */
+ if (endaddr < addr || PAGE_ALIGN(endaddr) < endaddr)
+ return ERR_PTR(-EINVAL);

There is a check_add_overflow() macro in <linux/overflow.h>, should that
be used instead?

+
+ if (npages == 0 || PAGES_2_MB(npages) > MLX5CTL_UMEM_MAX_MB)
+ return ERR_PTR(-EINVAL);
+
+ page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
+ if (!page_list)
+ return ERR_PTR(-ENOMEM);
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL_ACCOUNT);
+ if (!umem) {
+ kvfree(page_list);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ umem->addr = addr;
+ umem->size = size;
+ umem->offset = addr & ~PAGE_MASK;
+ umem->npages = npages;
+
+ umem->page_list = page_list;
+ umem->source_mm = current->mm;
+ umem->source_task = current->group_leader;
+ get_task_struct(current->group_leader);
+ umem->source_user = get_uid(current_user());

I think you could do this, right?

umem->source_task = get_task_struct(current->group_leader);
umem->source_user = get_current_user();

+
+ /* mm and RLIMIT_MEMLOCK user task accounting similar to what is
+ * being done in iopt_alloc_pages() and do_update_pinned()
+ * for IOPT_PAGES_ACCOUNT_USER @drivers/iommu/iommufd/pages.c
+ */
+ mmgrab(umem->source_mm);
+
+ pinned = pin_user_pages_fast(addr, npages, FOLL_WRITE, page_list);
+ if (pinned != npages) {
+ dev_dbg(mdev->device, "pin_user_pages_fast failed %d\n", pinned);
+ err = pinned < 0 ? pinned : -ENOMEM;
+ goto pin_failed;
+ }
+
+ err = inc_user_locked_vm(umem, npages);
+ if (err)
+ goto pin_failed;
+
+ atomic64_add(npages, &umem->source_mm->pinned_vm);
+
+ err = sg_alloc_table_from_pages(&umem->sgt, page_list, npages, 0,
+ npages << PAGE_SHIFT, GFP_KERNEL_ACCOUNT);
+ if (err) {
+ dev_dbg(mdev->device, "sg_alloc_table failed: %d\n", err);
+ goto sgt_failed;

Is this correct? See below...

+ }
+
+ dev_dbg(mdev->device, "\tsgt: size %zu npages %zu sgt.nents (%d)\n",
+ size, npages, umem->sgt.nents);
+
+ err = dma_map_sgtable(mdev->device, &umem->sgt, DMA_BIDIRECTIONAL, 0);
+ if (err) {
+ dev_dbg(mdev->device, "dma_map_sgtable failed: %d\n", err);
+ goto dma_failed;
+ }
+
+ dev_dbg(mdev->device, "\tsgt: dma_nents %d\n", umem->sgt.nents);
+ return umem;
+
+dma_failed:
+sgt_failed:
+ sg_free_table(&umem->sgt);

Not sure about calling sg_free_table() if sg_alloc_table_from_pages()
failed.

+ atomic64_sub(npages, &umem->source_mm->pinned_vm);
+ dec_user_locked_vm(umem, npages);
+pin_failed:
+ if (pinned > 0)
+ unpin_user_pages(page_list, pinned);
+ mmdrop(umem->source_mm);
+ free_uid(umem->source_user);
+ put_task_struct(umem->source_task);
+
+ kfree(umem);
+ kvfree(page_list);
+ return ERR_PTR(err);
+}


Vegard