RE: [PATCH V4 1/2] misc: vop: change the way of allocating vring and device page

From: Sherry Sun
Date: Tue Oct 27 2020 - 08:30:49 EST


Hi Christoph,

Thank you for improving the code. I think your code makes vop_mmap looks clearer.
I have tested it on imx8qm platform(arm64 architecture) and it works well.
I will send the code together in v5.

Best regards
Sherry


> Subject: Re: [PATCH V4 1/2] misc: vop: change the way of allocating vring and
> device page
>
> This looks much better, but we need to be careful to restore the original
> vm_start / vm_end. What do you think about the version below, which also
> simplifies vop_mmap a lot (completely untested):
>
> ---
> From 2de72bf7444ee187a7576962d746d482c5bdd593 Mon Sep 17 00:00:00
> 2001
> From: Sherry Sun <sherry.sun@xxxxxxx>
> Date: Mon, 26 Oct 2020 16:53:34 +0800
> Subject: misc: vop: change the way of allocating vring and device page
>
> Allocate vrings use dma_alloc_coherent is a common way in kernel. As the
> memory interacted between two systems should use consistent memory to
> avoid caching effects, same as device page memory.
>
> The orginal way use __get_free_pages and dma_map_single to allocate and
> map vring, but not use dma_sync_single_for_cpu/device api to sync the
> changes of vring between EP and RC, which will cause memory
> synchronization problem for those devices which don't support hardware
> dma coherent.
>
> Also change to use dma_mmap_coherent for mmap callback to map the
> device page and vring memory to userspace.
>
> Signed-off-by: Joakim Zhang <qiangqing.zhang@xxxxxxx>
> Signed-off-by: Sherry Sun <sherry.sun@xxxxxxx>
> ---
> drivers/misc/mic/bus/vop_bus.h | 2 +
> drivers/misc/mic/host/mic_boot.c | 9 ++
> drivers/misc/mic/host/mic_main.c | 43 +++-------
> drivers/misc/mic/vop/vop_vringh.c | 135 ++++++++++++------------------
> 4 files changed, 77 insertions(+), 112 deletions(-)
>
> diff --git a/drivers/misc/mic/bus/vop_bus.h
> b/drivers/misc/mic/bus/vop_bus.h index 4fa02808c1e27d..e21c06aeda7a31
> 100644
> --- a/drivers/misc/mic/bus/vop_bus.h
> +++ b/drivers/misc/mic/bus/vop_bus.h
> @@ -75,6 +75,7 @@ struct vop_driver {
> * node to add/remove/configure virtio devices.
> * @get_dp: Get access to the virtio device page used by the self
> * node to add/remove/configure virtio devices.
> + * @dp_mmap: Map the virtio device page to userspace.
> * @send_intr: Send an interrupt to the peer node on a specified doorbell.
> * @remap: Map a buffer with the specified DMA address and length.
> * @unmap: Unmap a buffer previously mapped.
> @@ -92,6 +93,7 @@ struct vop_hw_ops {
> void (*ack_interrupt)(struct vop_device *vpdev, int num);
> void __iomem * (*get_remote_dp)(struct vop_device *vpdev);
> void * (*get_dp)(struct vop_device *vpdev);
> + int (*dp_mmap)(struct vop_device *vpdev, struct vm_area_struct
> *vma);
> void (*send_intr)(struct vop_device *vpdev, int db);
> void __iomem * (*remap)(struct vop_device *vpdev,
> dma_addr_t pa, size_t len);
> diff --git a/drivers/misc/mic/host/mic_boot.c
> b/drivers/misc/mic/host/mic_boot.c
> index 8cb85b8b3e199b..44ed918b49b4d2 100644
> --- a/drivers/misc/mic/host/mic_boot.c
> +++ b/drivers/misc/mic/host/mic_boot.c
> @@ -89,6 +89,14 @@ static void *__mic_get_dp(struct vop_device *vpdev)
> return mdev->dp;
> }
>
> +static int __mic_dp_mmap(struct vop_device *vpdev, struct
> +vm_area_struct *vma) {
> + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
> +
> + return dma_mmap_coherent(&mdev->pdev->dev, vma, mdev->dp,
> + mdev->dp_dma_addr, MIC_DP_SIZE);
> +}
> +
> static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev) {
> return NULL;
> @@ -120,6 +128,7 @@ static struct vop_hw_ops vop_hw_ops = {
> .ack_interrupt = __mic_ack_interrupt,
> .next_db = __mic_next_db,
> .get_dp = __mic_get_dp,
> + .dp_mmap = __mic_dp_mmap,
> .get_remote_dp = __mic_get_remote_dp,
> .send_intr = __mic_send_intr,
> .remap = __mic_ioremap,
> diff --git a/drivers/misc/mic/host/mic_main.c
> b/drivers/misc/mic/host/mic_main.c
> index ea4608527ea04d..fab87a72a9a4a5 100644
> --- a/drivers/misc/mic/host/mic_main.c
> +++ b/drivers/misc/mic/host/mic_main.c
> @@ -10,6 +10,7 @@
> #include <linux/module.h>
> #include <linux/pci.h>
> #include <linux/poll.h>
> +#include <linux/dma-mapping.h>
>
> #include <linux/mic_common.h>
> #include "../common/mic_dev.h"
> @@ -45,33 +46,6 @@ MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
> /* ID allocator for MIC devices */
> static struct ida g_mic_ida;
>
> -/* Initialize the device page */
> -static int mic_dp_init(struct mic_device *mdev) -{
> - mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
> - if (!mdev->dp)
> - return -ENOMEM;
> -
> - mdev->dp_dma_addr = mic_map_single(mdev,
> - mdev->dp, MIC_DP_SIZE);
> - if (mic_map_error(mdev->dp_dma_addr)) {
> - kfree(mdev->dp);
> - dev_err(&mdev->pdev->dev, "%s %d err %d\n",
> - __func__, __LINE__, -ENOMEM);
> - return -ENOMEM;
> - }
> - mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev-
> >dp_dma_addr);
> - mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev-
> >dp_dma_addr >> 32);
> - return 0;
> -}
> -
> -/* Uninitialize the device page */
> -static void mic_dp_uninit(struct mic_device *mdev) -{
> - mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE);
> - kfree(mdev->dp);
> -}
> -
> /**
> * mic_ops_init: Initialize HW specific operation tables.
> *
> @@ -227,11 +201,16 @@ static int mic_probe(struct pci_dev *pdev,
>
> pci_set_drvdata(pdev, mdev);
>
> - rc = mic_dp_init(mdev);
> - if (rc) {
> - dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
> + mdev->dp = dma_alloc_coherent(&pdev->dev, MIC_DP_SIZE,
> + &mdev->dp_dma_addr, GFP_KERNEL);
> + if (!mdev->dp) {
> + dev_err(&pdev->dev, "failed to allocate device page\n");
> goto smpt_uninit;
> }
> +
> + mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev-
> >dp_dma_addr);
> + mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev-
> >dp_dma_addr >> 32);
> +
> mic_bootparam_init(mdev);
> mic_create_debug_dir(mdev);
>
> @@ -244,7 +223,7 @@ static int mic_probe(struct pci_dev *pdev,
> return 0;
> cleanup_debug_dir:
> mic_delete_debug_dir(mdev);
> - mic_dp_uninit(mdev);
> + dma_free_coherent(&pdev->dev, MIC_DP_SIZE, mdev->dp,
> +mdev->dp_dma_addr);
> smpt_uninit:
> mic_smpt_uninit(mdev);
> free_interrupts:
> @@ -283,7 +262,7 @@ static void mic_remove(struct pci_dev *pdev)
>
> cosm_unregister_device(mdev->cosm_dev);
> mic_delete_debug_dir(mdev);
> - mic_dp_uninit(mdev);
> + dma_free_coherent(&pdev->dev, MIC_DP_SIZE, mdev->dp,
> +mdev->dp_dma_addr);
> mic_smpt_uninit(mdev);
> mic_free_interrupts(mdev, pdev);
> iounmap(mdev->aper.va);
> diff --git a/drivers/misc/mic/vop/vop_vringh.c
> b/drivers/misc/mic/vop/vop_vringh.c
> index 7014ffe88632e5..6835648d577d57 100644
> --- a/drivers/misc/mic/vop/vop_vringh.c
> +++ b/drivers/misc/mic/vop/vop_vringh.c
> @@ -298,9 +298,8 @@ static int vop_virtio_add_device(struct vop_vdev
> *vdev,
> mutex_init(&vvr->vr_mutex);
> vr_size = PAGE_ALIGN(round_up(vring_size(num,
> MIC_VIRTIO_RING_ALIGN), 4) +
> sizeof(struct _mic_vring_info));
> - vr->va = (void *)
> - __get_free_pages(GFP_KERNEL | __GFP_ZERO,
> - get_order(vr_size));
> + vr->va = dma_alloc_coherent(vop_dev(vdev), vr_size,
> &vr_addr,
> + GFP_KERNEL);
> if (!vr->va) {
> ret = -ENOMEM;
> dev_err(vop_dev(vdev), "%s %d err %d\n", @@ -
> 310,15 +309,6 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
> vr->len = vr_size;
> vr->info = vr->va + round_up(vring_size(num,
> MIC_VIRTIO_RING_ALIGN), 4);
> vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id
> + i);
> - vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
> - DMA_BIDIRECTIONAL);
> - if (dma_mapping_error(&vpdev->dev, vr_addr)) {
> - free_pages((unsigned long)vr->va,
> get_order(vr_size));
> - ret = -ENOMEM;
> - dev_err(vop_dev(vdev), "%s %d err %d\n",
> - __func__, __LINE__, ret);
> - goto err;
> - }
> vqconfig[i].address = cpu_to_le64(vr_addr);
>
> vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
> @@ -339,11 +329,9 @@ static int vop_virtio_add_device(struct vop_vdev
> *vdev,
> dev_dbg(&vpdev->dev,
> "%s %d index %d va %p info %p vr_size 0x%x\n",
> __func__, __LINE__, i, vr->va, vr->info, vr_size);
> - vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
> - get_order(VOP_INT_DMA_BUF_SIZE));
> - vvr->buf_da = dma_map_single(&vpdev->dev,
> - vvr->buf, VOP_INT_DMA_BUF_SIZE,
> - DMA_BIDIRECTIONAL);
> + vvr->buf = dma_alloc_coherent(vop_dev(vdev),
> + VOP_INT_DMA_BUF_SIZE,
> + &vvr->buf_da, GFP_KERNEL);
> }
>
> snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
> @@ -382,10 +370,8 @@ static int vop_virtio_add_device(struct vop_vdev
> *vdev,
> for (j = 0; j < i; j++) {
> struct vop_vringh *vvr = &vdev->vvr[j];
>
> - dma_unmap_single(&vpdev->dev,
> le64_to_cpu(vqconfig[j].address),
> - vvr->vring.len, DMA_BIDIRECTIONAL);
> - free_pages((unsigned long)vvr->vring.va,
> - get_order(vvr->vring.len));
> + dma_free_coherent(vop_dev(vdev), vvr->vring.len, vvr-
> >vring.va,
> + le64_to_cpu(vqconfig[j].address));
> }
> return ret;
> }
> @@ -433,17 +419,12 @@ static void vop_virtio_del_device(struct vop_vdev
> *vdev)
> for (i = 0; i < vdev->dd->num_vq; i++) {
> struct vop_vringh *vvr = &vdev->vvr[i];
>
> - dma_unmap_single(&vpdev->dev,
> - vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
> - DMA_BIDIRECTIONAL);
> - free_pages((unsigned long)vvr->buf,
> - get_order(VOP_INT_DMA_BUF_SIZE));
> + dma_free_coherent(vop_dev(vdev),
> VOP_INT_DMA_BUF_SIZE,
> + vvr->buf, vvr->buf_da);
> vringh_kiov_cleanup(&vvr->riov);
> vringh_kiov_cleanup(&vvr->wiov);
> - dma_unmap_single(&vpdev->dev,
> le64_to_cpu(vqconfig[i].address),
> - vvr->vring.len, DMA_BIDIRECTIONAL);
> - free_pages((unsigned long)vvr->vring.va,
> - get_order(vvr->vring.len));
> + dma_free_coherent(vop_dev(vdev), vvr->vring.len, vvr-
> >vring.va,
> + le64_to_cpu(vqconfig[i].address));
> }
> /*
> * Order the type update with previous stores. This write barrier @@
> -1042,13 +1023,27 @@ static __poll_t vop_poll(struct file *f, poll_table *wait)
> return mask;
> }
>
> -static inline int
> -vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
> - unsigned long *size, unsigned long *pa)
> +/*
> + * Maps the device page and virtio rings to user space for readonly access.
> + */
> +static int vop_mmap(struct file *f, struct vm_area_struct *vma)
> {
> - struct vop_device *vpdev = vdev->vpdev;
> - unsigned long start = MIC_DP_SIZE;
> - int i;
> + struct vop_vdev *vdev = f->private_data;
> + struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
> + unsigned long orig_start = vma->vm_start;
> + unsigned long orig_end = vma->vm_end;
> + int err, i;
> +
> + if (!vdev->vpdev->hw_ops->dp_mmap)
> + return -EINVAL;
> + if (vma->vm_pgoff)
> + return -EINVAL;
> + if (vma->vm_flags & VM_WRITE)
> + return -EACCES;
> +
> + err = vop_vdev_inited(vdev);
> + if (err)
> + return err;
>
> /*
> * MMAP interface is as follows:
> @@ -1057,58 +1052,38 @@ vop_query_offset(struct vop_vdev *vdev,
> unsigned long offset,
> * 0x1000 first vring
> * 0x1000 + size of 1st vring second vring
> * ....
> + *
> + * We manipulate vm_start/vm_end to trick dma_mmap_coherent
> into
> + * performing partial mappings, which is a bit of a hack, but safe
> + * while we are under mmap_lock(). Eventually this needs to be
> + * replaced by a proper DMA layer API.
> */
> - if (!offset) {
> - *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
> - *size = MIC_DP_SIZE;
> - return 0;
> - }
> + vma->vm_end = vma->vm_start + MIC_DP_SIZE;
> + err = vdev->vpdev->hw_ops->dp_mmap(vdev->vpdev, vma);
> + if (err)
> + goto out;
>
> for (i = 0; i < vdev->dd->num_vq; i++) {
> struct vop_vringh *vvr = &vdev->vvr[i];
>
> - if (offset == start) {
> - *pa = virt_to_phys(vvr->vring.va);
> - *size = vvr->vring.len;
> - return 0;
> - }
> - start += vvr->vring.len;
> - }
> - return -1;
> -}
> -
> -/*
> - * Maps the device page and virtio rings to user space for readonly access.
> - */
> -static int vop_mmap(struct file *f, struct vm_area_struct *vma) -{
> - struct vop_vdev *vdev = f->private_data;
> - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
> - unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem =
> size;
> - int i, err;
> + vma->vm_start = vma->vm_end;
> + vma->vm_end += vvr->vring.len;
>
> - err = vop_vdev_inited(vdev);
> - if (err)
> - goto ret;
> - if (vma->vm_flags & VM_WRITE) {
> - err = -EACCES;
> - goto ret;
> - }
> - while (size_rem) {
> - i = vop_query_offset(vdev, offset, &size, &pa);
> - if (i < 0) {
> - err = -EINVAL;
> - goto ret;
> - }
> - err = remap_pfn_range(vma, vma->vm_start + offset,
> - pa >> PAGE_SHIFT, size,
> - vma->vm_page_prot);
> + err = -EINVAL;
> + if (vma->vm_end > orig_end)
> + goto out;
> + err = dma_mmap_coherent(vop_dev(vdev), vma, vvr-
> >vring.va,
> + le64_to_cpu(vqconfig[i].address),
> + vvr->vring.len);
> if (err)
> - goto ret;
> - size_rem -= size;
> - offset += size;
> + goto out;
> }
> -ret:
> +out:
> + /*
> + * Restore the original vma parameters.
> + */
> + vma->vm_start = orig_start;
> + vma->vm_end = orig_end;
> return err;
> }
>
> --
> 2.28.0