Re: [PATCH v2 22/22] fpga: intel: afu: add FPGA_PORT_DMA_MAP/UNMAP ioctls support

From: Alan Tull
Date: Mon Jul 31 2017 - 17:42:13 EST


On Sun, Jun 25, 2017 at 8:52 PM, Wu Hao <hao.wu@xxxxxxxxx> wrote:

Hi Hao,

Please run checkpatch.pl --strict on this.

Could you add some kernel-doc function comments here to help the new
user (or reviewer) get a better handle on what this code is doing?

A few mostly minor comments below.

> DMA memory regions are required for Accelerated Function Unit (AFU) usage.
> These two ioctls allow user space applications to map user memory regions
> for dma, and unmap them after use. Iova is returned from driver to user
> space application via FPGA_PORT_DMA_MAP ioctl. Application needs to unmap
> it after use, otherwise, driver will unmap them in device file release
> operation.
>
> All the mapped regions are managed via a rb tree.

Please note that each afu has its own rb tree (this comment sounds
like there's one rb tree for all) to keep track of its DMA regions.

>
> Ioctl interfaces:
> * FPGA_PORT_DMA_MAP
> Do the dma mapping per user_addr and length which provided by user.
> Return iova in provided struct afu_port_dma_map.
>
> * FPGA_PORT_DMA_UNMAP
> Unmap the dma region per iova provided by user.
>
> Signed-off-by: Tim Whisonant <tim.whisonant@xxxxxxxxx>
> Signed-off-by: Enno Luebbers <enno.luebbers@xxxxxxxxx>
> Signed-off-by: Shiva Rao <shiva.rao@xxxxxxxxx>
> Signed-off-by: Christopher Rauer <christopher.rauer@xxxxxxxxx>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx>
> Signed-off-by: Wu Hao <hao.wu@xxxxxxxxx>
> ---
> v2: moved the code to drivers/fpga folder as suggested by Alan Tull.
> switched to GPLv2 license.
> fixed kbuild warnings.
> ---
> drivers/fpga/Makefile | 3 +-
> drivers/fpga/intel-afu-dma-region.c | 372 ++++++++++++++++++++++++++++++++++++
> drivers/fpga/intel-afu-main.c | 61 +++++-
> drivers/fpga/intel-afu.h | 18 ++
> include/uapi/linux/intel-fpga.h | 37 ++++
> 5 files changed, 489 insertions(+), 2 deletions(-)
> create mode 100644 drivers/fpga/intel-afu-dma-region.c
>
> diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
> index 45c0538..339d1f3 100644
> --- a/drivers/fpga/Makefile
> +++ b/drivers/fpga/Makefile
> @@ -38,4 +38,5 @@ obj-$(CONFIG_INTEL_FPGA_AFU) += intel-fpga-afu.o
>
> intel-fpga-pci-objs := intel-pcie.o intel-feature-dev.o
> intel-fpga-fme-objs := intel-fme-main.o intel-fme-pr.o
> -intel-fpga-afu-objs := intel-afu-main.o intel-afu-region.o
> +intel-fpga-afu-objs := intel-afu-main.o intel-afu-region.o \
> + intel-afu-dma-region.o
> diff --git a/drivers/fpga/intel-afu-dma-region.c b/drivers/fpga/intel-afu-dma-region.c
> new file mode 100644
> index 0000000..982a9b5
> --- /dev/null
> +++ b/drivers/fpga/intel-afu-dma-region.c
> @@ -0,0 +1,372 @@
> +/*
> + * Driver for Intel FPGA Accelerated Function Unit (AFU) DMA Region Management
> + *
> + * Copyright (C) 2017 Intel Corporation, Inc.
> + *
> + * Authors:
> + * Wu Hao <hao.wu@xxxxxxxxx>
> + * Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx>
> + *
> + * This work is licensed under the terms of the GNU GPL version 2. See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include <linux/sched/signal.h>
> +#include <linux/uaccess.h>
> +
> +#include "intel-afu.h"
> +
> +static void put_all_pages(struct page **pages, int npages)
> +{
> + int i;
> +
> + for (i = 0; i < npages; i++)
> + if (pages[i] != NULL)
> + put_page(pages[i]);
> +}
> +
> +void afu_dma_region_init(struct feature_platform_data *pdata)
> +{
> + struct fpga_afu *afu = fpga_pdata_get_private(pdata);
> +
> + afu->dma_regions = RB_ROOT;
> +}
> +
> +static long afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr)
> +{
> + unsigned long locked, lock_limit;
> + int ret = 0;
> +
> + /* the task is exiting. */
> + if (!current->mm)
> + return 0;
> +
> + down_write(&current->mm->mmap_sem);
> +
> + if (incr) {
> + locked = current->mm->locked_vm + npages;
> + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> +
> + if (locked > lock_limit && !capable(CAP_IPC_LOCK))
> + ret = -ENOMEM;
> + else
> + current->mm->locked_vm += npages;
> + } else {
> +

Don't need to skip a line here.

> + if (WARN_ON_ONCE(npages > current->mm->locked_vm))
> + npages = current->mm->locked_vm;
> + current->mm->locked_vm -= npages;
> + }
> +
> + dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid,
> + incr ? '+' : '-',
> + npages << PAGE_SHIFT,
> + current->mm->locked_vm << PAGE_SHIFT,
> + rlimit(RLIMIT_MEMLOCK),
> + ret ? "- execeeded" : "");
> +
> + up_write(&current->mm->mmap_sem);
> +
> + return ret;
> +}
> +
> +static long afu_dma_pin_pages(struct feature_platform_data *pdata,
> + struct fpga_afu_dma_region *region)

Conventionally, return type is usually int in kernel functions that
return 0 or error code.

> +{
> + long npages = region->length >> PAGE_SHIFT;
> + struct device *dev = &pdata->dev->dev;
> + long ret, pinned;
> +
> + ret = afu_dma_adjust_locked_vm(dev, npages, true);
> + if (ret)
> + return ret;
> +
> + region->pages = kcalloc(npages, sizeof(struct page *), GFP_KERNEL);
> + if (!region->pages) {
> + afu_dma_adjust_locked_vm(dev, npages, false);
> + return -ENOMEM;
> + }
> +
> + pinned = get_user_pages_fast(region->user_addr, npages, 1,
> + region->pages);
> + if (pinned < 0) {
> + ret = pinned;

This return value gets all the way to be returned by the ioctl and
isn't used for its value as the number that actually got pinned by the
caller.

> + goto err_put_pages;
> + } else if (pinned != npages) {
> + ret = -EFAULT;
> + goto err;
> + }
> +
> + dev_dbg(dev, "%ld pages pinned\n", pinned);
> +
> + return 0;
> +
> +err_put_pages:
> + put_all_pages(region->pages, pinned);
> +err:
> + kfree(region->pages);
> + afu_dma_adjust_locked_vm(dev, npages, false);
> + return ret;
> +}
> +
> +static void afu_dma_unpin_pages(struct feature_platform_data *pdata,
> + struct fpga_afu_dma_region *region)
> +{
> + long npages = region->length >> PAGE_SHIFT;
> + struct device *dev = &pdata->dev->dev;
> +
> + put_all_pages(region->pages, npages);
> + kfree(region->pages);
> + afu_dma_adjust_locked_vm(dev, npages, false);
> +
> + dev_dbg(dev, "%ld pages unpinned\n", npages);
> +}
> +
> +static bool afu_dma_check_continuous_pages(struct fpga_afu_dma_region *region)
> +{
> + int npages = region->length >> PAGE_SHIFT;
> + int i;
> +
> + for (i = 0; i < npages - 1; i++)
> + if (page_to_pfn(region->pages[i]) + 1 !=
> + page_to_pfn(region->pages[i+1]))
> + return false;
> +
> + return true;
> +}
> +
> +static bool dma_region_check_iova(struct fpga_afu_dma_region *region,
> + u64 iova, u64 size)

This function checks that the area defined by iova and size is fully
contained in the region, right? A comment would help.

> +{
> + if (!size && region->iova != iova)
> + return false;
> +
> + return (region->iova <= iova) &&
> + (region->length + region->iova >= iova + size);
> +}
> +
> +/* Need to be called with pdata->lock held */
> +static int afu_dma_region_add(struct feature_platform_data *pdata,
> + struct fpga_afu_dma_region *region)
> +{
> + struct fpga_afu *afu = fpga_pdata_get_private(pdata);
> + struct rb_node **new, *parent = NULL;
> +
> + dev_dbg(&pdata->dev->dev, "add region (iova = %llx)\n",
> + (unsigned long long)region->iova);
> +
> + new = &(afu->dma_regions.rb_node);
> +
> + while (*new) {
> + struct fpga_afu_dma_region *this;
> +
> + this = container_of(*new, struct fpga_afu_dma_region, node);
> +
> + parent = *new;
> +
> + if (dma_region_check_iova(this, region->iova, region->length))
> + return -EEXIST;
> +
> + if (region->iova < this->iova)
> + new = &((*new)->rb_left);
> + else if (region->iova > this->iova)
> + new = &((*new)->rb_right);
> + else
> + return -EEXIST;
> + }
> +
> + rb_link_node(&region->node, parent, new);
> + rb_insert_color(&region->node, &afu->dma_regions);
> +
> + return 0;
> +}
> +
> +/* Need to be called with pdata->lock held */
> +static void afu_dma_region_remove(struct feature_platform_data *pdata,
> + struct fpga_afu_dma_region *region)
> +{
> + struct fpga_afu *afu;
> +
> + dev_dbg(&pdata->dev->dev, "del region (iova = %llx)\n",
> + (unsigned long long)region->iova);
> +
> + afu = fpga_pdata_get_private(pdata);
> + rb_erase(&region->node, &afu->dma_regions);
> +}
> +
> +/* Need to be called with pdata->lock held */
> +void afu_dma_region_destroy(struct feature_platform_data *pdata)
> +{
> + struct fpga_afu *afu = fpga_pdata_get_private(pdata);
> + struct rb_node *node = rb_first(&afu->dma_regions);
> + struct fpga_afu_dma_region *region;
> +
> + while (node) {
> + region = container_of(node, struct fpga_afu_dma_region, node);
> +
> + dev_dbg(&pdata->dev->dev, "del region (iova = %llx)\n",
> + (unsigned long long)region->iova);
> +
> + rb_erase(node, &afu->dma_regions);
> +
> + if (region->iova)
> + dma_unmap_page(fpga_pdata_to_pcidev(pdata),
> + region->iova, region->length,
> + DMA_BIDIRECTIONAL);
> +
> + if (region->pages)
> + afu_dma_unpin_pages(pdata, region);
> +
> + node = rb_next(node);
> + kfree(region);
> + }
> +}
> +
> +/*
> + * It finds the dma region from the rbtree based on @iova and @size:
> + * - if @size == 0, it finds the dma region which starts from @iova
> + * - otherwise, it finds the dma region which fully contains
> + * [@iova, @iova+size)
> + * If nothing is matched returns NULL.
> + *
> + * Need to be called with pdata->lock held.
> + */
> +struct fpga_afu_dma_region *
> +afu_dma_region_find(struct feature_platform_data *pdata, u64 iova, u64 size)
> +{
> + struct fpga_afu *afu = fpga_pdata_get_private(pdata);
> + struct rb_node *node = afu->dma_regions.rb_node;
> + struct device *dev = &pdata->dev->dev;
> +
> + while (node) {
> + struct fpga_afu_dma_region *region;
> +
> + region = container_of(node, struct fpga_afu_dma_region, node);
> +
> + if (dma_region_check_iova(region, iova, size)) {
> + dev_dbg(dev, "find region (iova = %llx)\n",
> + (unsigned long long)region->iova);
> + return region;
> + }
> +
> + if (iova < region->iova)
> + node = node->rb_left;
> + else if (iova > region->iova)
> + node = node->rb_right;
> + else
> + /* the iova region is not fully covered. */
> + break;
> + }
> +
> + dev_dbg(dev, "region with iova %llx and size %llx is not found\n",
> + (unsigned long long)iova, (unsigned long long)size);
> + return NULL;
> +}
> +
> +static struct fpga_afu_dma_region *
> +afu_dma_region_find_iova(struct feature_platform_data *pdata, u64 iova)
> +{
> + return afu_dma_region_find(pdata, iova, 0);
> +}
> +
> +long afu_dma_map_region(struct feature_platform_data *pdata,
> + u64 user_addr, u64 length, u64 *iova)
> +{
> + struct fpga_afu_dma_region *region;
> + int ret;
> +
> + /*
> + * Check Inputs, only accept page-aligned user memory region with
> + * valid length.
> + */
> + if (!PAGE_ALIGNED(user_addr) || !PAGE_ALIGNED(length) || !length)
> + return -EINVAL;
> +
> + /* Check overflow */
> + if (user_addr + length < user_addr)
> + return -EINVAL;
> +
> + if (!access_ok(VERIFY_WRITE, (void __user *)(unsigned long)user_addr,
> + length))
> + return -EINVAL;
> +
> + region = kzalloc(sizeof(*region), GFP_KERNEL);
> + if (!region)
> + return -ENOMEM;
> +
> + region->user_addr = user_addr;
> + region->length = length;
> +
> + /* Pin the user memory region */
> + ret = afu_dma_pin_pages(pdata, region);
> + if (ret) {
> + dev_err(&pdata->dev->dev, "fail to pin memory region\n");
> + goto free_region;
> + }
> +
> + /* Only accept continuous pages, return error if no */
> + if (!afu_dma_check_continuous_pages(region)) {
> + dev_err(&pdata->dev->dev, "pages are not continuous\n");
> + ret = -EINVAL;
> + goto unpin_pages;
> + }
> +
> + /* As pages are continuous then start to do DMA mapping */
> + region->iova = dma_map_page(fpga_pdata_to_pcidev(pdata),
> + region->pages[0], 0,
> + region->length,
> + DMA_BIDIRECTIONAL);
> + if (dma_mapping_error(&pdata->dev->dev, region->iova)) {
> + dev_err(&pdata->dev->dev, "fail to map dma mapping\n");
> + ret = -EFAULT;
> + goto unpin_pages;
> + }
> +
> + *iova = region->iova;
> +
> + mutex_lock(&pdata->lock);
> + ret = afu_dma_region_add(pdata, region);
> + mutex_unlock(&pdata->lock);
> + if (ret) {
> + dev_err(&pdata->dev->dev, "fail to add dma region\n");
> + goto unmap_dma;
> + }
> +
> + return 0;
> +
> +unmap_dma:
> + dma_unmap_page(fpga_pdata_to_pcidev(pdata),
> + region->iova, region->length, DMA_BIDIRECTIONAL);
> +unpin_pages:
> + afu_dma_unpin_pages(pdata, region);
> +free_region:
> + kfree(region);
> + return ret;
> +}
> +
> +long afu_dma_unmap_region(struct feature_platform_data *pdata, u64 iova)
> +{
> + struct fpga_afu_dma_region *region;
> +
> + mutex_lock(&pdata->lock);
> + region = afu_dma_region_find_iova(pdata, iova);
> + if (!region) {
> + mutex_unlock(&pdata->lock);
> + return -EINVAL;
> + }
> +
> + if (region->in_use) {
> + mutex_unlock(&pdata->lock);
> + return -EBUSY;
> + }
> +
> + afu_dma_region_remove(pdata, region);
> + mutex_unlock(&pdata->lock);
> +
> + dma_unmap_page(fpga_pdata_to_pcidev(pdata),
> + region->iova, region->length, DMA_BIDIRECTIONAL);
> + afu_dma_unpin_pages(pdata, region);
> + kfree(region);
> +
> + return 0;
> +}
> diff --git a/drivers/fpga/intel-afu-main.c b/drivers/fpga/intel-afu-main.c
> index 8c7aa70..d9f1ebf 100644
> --- a/drivers/fpga/intel-afu-main.c
> +++ b/drivers/fpga/intel-afu-main.c
> @@ -175,7 +175,11 @@ static int afu_release(struct inode *inode, struct file *filp)
>
> dev_dbg(&pdev->dev, "Device File Release\n");
>
> - fpga_port_reset(pdev);
> + mutex_lock(&pdata->lock);
> + __fpga_port_reset(pdev);
> + afu_dma_region_destroy(pdata);
> + mutex_unlock(&pdata->lock);
> +
> feature_dev_use_end(pdata);
> return 0;
> }
> @@ -245,6 +249,55 @@ static long afu_ioctl_check_extension(struct feature_platform_data *pdata,
> return 0;
> }
>
> +static long
> +afu_ioctl_dma_map(struct feature_platform_data *pdata, void __user *arg)
> +{
> + struct fpga_port_dma_map map;
> + unsigned long minsz;
> + long ret;
> +
> + minsz = offsetofend(struct fpga_port_dma_map, iova);
> +
> + if (copy_from_user(&map, arg, minsz))

Why are you using offsetofend() instead of sizeof(map)? Is this
struct going to expand beyond iova? Also below in one place.

> + return -EFAULT;
> +
> + if (map.argsz < minsz || map.flags)
> + return -EINVAL;
> +
> + ret = afu_dma_map_region(pdata, map.user_addr, map.length, &map.iova);
> + if (ret)
> + return ret;
> +
> + if (copy_to_user(arg, &map, sizeof(map))) {
> + afu_dma_unmap_region(pdata, map.iova);
> + return -EFAULT;
> + }
> +
> + dev_dbg(&pdata->dev->dev, "dma map: ua=%llx, len=%llx, iova=%llx\n",
> + (unsigned long long)map.user_addr,
> + (unsigned long long)map.length,
> + (unsigned long long)map.iova);
> +
> + return 0;
> +}
> +
> +static long
> +afu_ioctl_dma_unmap(struct feature_platform_data *pdata, void __user *arg)
> +{
> + struct fpga_port_dma_unmap unmap;
> + unsigned long minsz;
> +
> + minsz = offsetofend(struct fpga_port_dma_unmap, iova);

Here as well.

> +
> + if (copy_from_user(&unmap, arg, minsz))
> + return -EFAULT;
> +
> + if (unmap.argsz < minsz || unmap.flags)
> + return -EINVAL;
> +
> + return afu_dma_unmap_region(pdata, unmap.iova);
> +}
> +
> static long afu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> {
> struct platform_device *pdev = filp->private_data;
> @@ -263,6 +316,10 @@ static long afu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> return afu_ioctl_get_info(pdata, (void __user *)arg);
> case FPGA_PORT_GET_REGION_INFO:
> return afu_ioctl_get_region_info(pdata, (void __user *)arg);
> + case FPGA_PORT_DMA_MAP:
> + return afu_ioctl_dma_map(pdata, (void __user *)arg);
> + case FPGA_PORT_DMA_UNMAP:
> + return afu_ioctl_dma_unmap(pdata, (void __user *)arg);
> default:
> /*
> * Let sub-feature's ioctl function to handle the cmd
> @@ -337,6 +394,7 @@ static int afu_dev_init(struct platform_device *pdev)
> mutex_lock(&pdata->lock);
> fpga_pdata_set_private(pdata, afu);
> afu_region_init(pdata);
> + afu_dma_region_init(pdata);
> mutex_unlock(&pdata->lock);
> return 0;
> }
> @@ -349,6 +407,7 @@ static int afu_dev_destroy(struct platform_device *pdev)
> mutex_lock(&pdata->lock);
> afu = fpga_pdata_get_private(pdata);
> afu_region_destroy(pdata);
> + afu_dma_region_destroy(pdata);
> fpga_pdata_set_private(pdata, NULL);
> mutex_unlock(&pdata->lock);
>
> diff --git a/drivers/fpga/intel-afu.h b/drivers/fpga/intel-afu.h
> index 3417780d..23f7e24 100644
> --- a/drivers/fpga/intel-afu.h
> +++ b/drivers/fpga/intel-afu.h
> @@ -30,11 +30,21 @@ struct fpga_afu_region {
> struct list_head node;
> };
>
> +struct fpga_afu_dma_region {
> + u64 user_addr;
> + u64 length;
> + u64 iova;
> + struct page **pages;
> + struct rb_node node;
> + bool in_use;
> +};
> +
> struct fpga_afu {
> u64 region_cur_offset;
> int num_regions;
> u8 num_umsgs;
> struct list_head regions;
> + struct rb_root dma_regions;
>
> struct feature_platform_data *pdata;
> };
> @@ -49,4 +59,12 @@ int afu_get_region_by_offset(struct feature_platform_data *pdata,
> u64 offset, u64 size,
> struct fpga_afu_region *pregion);
>
> +void afu_dma_region_init(struct feature_platform_data *pdata);
> +void afu_dma_region_destroy(struct feature_platform_data *pdata);
> +long afu_dma_map_region(struct feature_platform_data *pdata,
> + u64 user_addr, u64 length, u64 *iova);
> +long afu_dma_unmap_region(struct feature_platform_data *pdata, u64 iova);
> +struct fpga_afu_dma_region *afu_dma_region_find(
> + struct feature_platform_data *pdata, u64 iova, u64 size);
> +
> #endif
> diff --git a/include/uapi/linux/intel-fpga.h b/include/uapi/linux/intel-fpga.h
> index a2ad332..b97ea02 100644
> --- a/include/uapi/linux/intel-fpga.h
> +++ b/include/uapi/linux/intel-fpga.h
> @@ -111,6 +111,43 @@ struct fpga_port_region_info {
>
> #define FPGA_PORT_GET_REGION_INFO _IO(FPGA_MAGIC, PORT_BASE + 2)
>
> +/**
> + * FPGA_PORT_DMA_MAP - _IOWR(FPGA_MAGIC, PORT_BASE + 3,
> + * struct fpga_port_dma_map)
> + *
> + * Map the dma memory per user_addr and length which are provided by caller.
> + * Driver fills the iova in provided struct afu_port_dma_map.
> + * This interface only accepts page-size aligned user memory for dma mapping.
> + * Return: 0 on success, -errno on failure.
> + */
> +struct fpga_port_dma_map {
> + /* Input */
> + __u32 argsz; /* Structure length */
> + __u32 flags; /* Zero for now */
> + __u64 user_addr; /* Process virtual address */
> + __u64 length; /* Length of mapping (bytes)*/
> + /* Output */
> + __u64 iova; /* IO virtual address */
> +};
> +
> +#define FPGA_PORT_DMA_MAP _IO(FPGA_MAGIC, PORT_BASE + 3)
> +
> +/**
> + * FPGA_PORT_DMA_UNMAP - _IOW(FPGA_MAGIC, PORT_BASE + 4,
> + * struct fpga_port_dma_unmap)
> + *
> + * Unmap the dma memory per iova provided by caller.
> + * Return: 0 on success, -errno on failure.
> + */
> +struct fpga_port_dma_unmap {
> + /* Input */
> + __u32 argsz; /* Structure length */
> + __u32 flags; /* Zero for now */
> + __u64 iova; /* IO virtual address */
> +};
> +
> +#define FPGA_PORT_DMA_UNMAP _IO(FPGA_MAGIC, PORT_BASE + 4)
> +
> /* IOCTLs for FME file descriptor */
>
> /**
> --
> 1.8.3.1
>