Re: [PATCH] virtio_ring: skip cpu sync when mapping fails

From: Michael S. Tsirkin
Date: Wed Jan 08 2025 - 06:36:15 EST


On Mon, Nov 11, 2024 at 10:55:38AM +0800, Jason Wang wrote:
> There's no need to sync DMA for CPU on mapping errors. So this patch
> skips the CPU sync in the error handling path of DMA mapping.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>


So as I said, I do not get why we are optimizing error paths.
The commit log at least needs to be improved to document
the motivation.




> ---
> drivers/virtio/virtio_ring.c | 98 +++++++++++++++++++++---------------
> 1 file changed, 57 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index be7309b1e860..b422b5fb22db 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -441,8 +441,10 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> */
>
> static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
> - const struct vring_desc *desc)
> + const struct vring_desc *desc,
> + bool skip_sync)
> {
> + unsigned long attrs = skip_sync ? DMA_ATTR_SKIP_CPU_SYNC : 0;
> u16 flags;


If you really feel we must do it, just pass attrs directly so
we do not get an extra branch. Also makes for a more readable code.

> if (!vq->do_unmap)
> @@ -450,16 +452,18 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
>
> flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
>
> - dma_unmap_page(vring_dma_dev(vq),
> - virtio64_to_cpu(vq->vq.vdev, desc->addr),
> - virtio32_to_cpu(vq->vq.vdev, desc->len),
> - (flags & VRING_DESC_F_WRITE) ?
> - DMA_FROM_DEVICE : DMA_TO_DEVICE);
> + dma_unmap_page_attrs(vring_dma_dev(vq),
> + virtio64_to_cpu(vq->vq.vdev, desc->addr),
> + virtio32_to_cpu(vq->vq.vdev, desc->len),
> + (flags & VRING_DESC_F_WRITE) ?
> + DMA_FROM_DEVICE : DMA_TO_DEVICE,
> + attrs);
> }
>
> static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> - unsigned int i)
> + unsigned int i, bool skip_sync)
> {
> + unsigned long attrs = skip_sync ? DMA_ATTR_SKIP_CPU_SYNC : 0;
> struct vring_desc_extra *extra = vq->split.desc_extra;
> u16 flags;
>
> @@ -469,20 +473,22 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> if (!vq->use_dma_api)
> goto out;
>
> - dma_unmap_single(vring_dma_dev(vq),
> - extra[i].addr,
> - extra[i].len,
> - (flags & VRING_DESC_F_WRITE) ?
> - DMA_FROM_DEVICE : DMA_TO_DEVICE);
> + dma_unmap_single_attrs(vring_dma_dev(vq),
> + extra[i].addr,
> + extra[i].len,
> + (flags & VRING_DESC_F_WRITE) ?
> + DMA_FROM_DEVICE : DMA_TO_DEVICE,
> + attrs);
> } else {
> if (!vq->do_unmap)
> goto out;
>
> - dma_unmap_page(vring_dma_dev(vq),
> - extra[i].addr,
> - extra[i].len,
> - (flags & VRING_DESC_F_WRITE) ?
> - DMA_FROM_DEVICE : DMA_TO_DEVICE);
> + dma_unmap_page_attrs(vring_dma_dev(vq),
> + extra[i].addr,
> + extra[i].len,
> + (flags & VRING_DESC_F_WRITE) ?
> + DMA_FROM_DEVICE : DMA_TO_DEVICE,
> + attrs);
> }
>
> out:
> @@ -717,10 +723,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> if (i == err_idx)
> break;
> if (indirect) {
> - vring_unmap_one_split_indirect(vq, &desc[i]);
> + vring_unmap_one_split_indirect(vq, &desc[i], true);
> i = virtio16_to_cpu(_vq->vdev, desc[i].next);
> } else
> - i = vring_unmap_one_split(vq, i);
> + i = vring_unmap_one_split(vq, i, true);
> }
>
> free_indirect:
> @@ -775,12 +781,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> i = head;
>
> while (vq->split.vring.desc[i].flags & nextflag) {
> - vring_unmap_one_split(vq, i);
> + vring_unmap_one_split(vq, i, false);
> i = vq->split.desc_extra[i].next;
> vq->vq.num_free++;
> }
>
> - vring_unmap_one_split(vq, i);
> + vring_unmap_one_split(vq, i, false);
> vq->split.desc_extra[i].next = vq->free_head;
> vq->free_head = head;
>
> @@ -804,7 +810,8 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>
> if (vq->do_unmap) {
> for (j = 0; j < len / sizeof(struct vring_desc); j++)
> - vring_unmap_one_split_indirect(vq, &indir_desc[j]);
> + vring_unmap_one_split_indirect(vq,
> + &indir_desc[j], false);
> }
>
> kfree(indir_desc);
> @@ -1221,8 +1228,10 @@ static u16 packed_last_used(u16 last_used_idx)
> }
>
> static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> - const struct vring_desc_extra *extra)
> + const struct vring_desc_extra *extra,
> + bool skip_sync)
> {
> + unsigned long attrs = skip_sync ? DMA_ATTR_SKIP_CPU_SYNC : 0;
> u16 flags;
>
> flags = extra->flags;
> @@ -1231,24 +1240,28 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> if (!vq->use_dma_api)
> return;
>
> - dma_unmap_single(vring_dma_dev(vq),
> - extra->addr, extra->len,
> - (flags & VRING_DESC_F_WRITE) ?
> - DMA_FROM_DEVICE : DMA_TO_DEVICE);
> + dma_unmap_single_attrs(vring_dma_dev(vq),
> + extra->addr, extra->len,
> + (flags & VRING_DESC_F_WRITE) ?
> + DMA_FROM_DEVICE : DMA_TO_DEVICE,
> + attrs);
> } else {
> if (!vq->do_unmap)
> return;
>
> - dma_unmap_page(vring_dma_dev(vq),
> - extra->addr, extra->len,
> - (flags & VRING_DESC_F_WRITE) ?
> - DMA_FROM_DEVICE : DMA_TO_DEVICE);
> + dma_unmap_page_attrs(vring_dma_dev(vq),
> + extra->addr, extra->len,
> + (flags & VRING_DESC_F_WRITE) ?
> + DMA_FROM_DEVICE : DMA_TO_DEVICE,
> + attrs);
> }
> }
>
> static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
> - const struct vring_packed_desc *desc)
> + const struct vring_packed_desc *desc,
> + bool skip_sync)
> {
> + unsigned long attrs = skip_sync ? DMA_ATTR_SKIP_CPU_SYNC : 0;
> u16 flags;
>
> if (!vq->do_unmap)
> @@ -1256,11 +1269,12 @@ static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
>
> flags = le16_to_cpu(desc->flags);
>
> - dma_unmap_page(vring_dma_dev(vq),
> - le64_to_cpu(desc->addr),
> - le32_to_cpu(desc->len),
> - (flags & VRING_DESC_F_WRITE) ?
> - DMA_FROM_DEVICE : DMA_TO_DEVICE);
> + dma_unmap_page_attrs(vring_dma_dev(vq),
> + le64_to_cpu(desc->addr),
> + le32_to_cpu(desc->len),
> + (flags & VRING_DESC_F_WRITE) ?
> + DMA_FROM_DEVICE : DMA_TO_DEVICE,
> + attrs);
> }
>
> static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
> @@ -1389,7 +1403,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> err_idx = i;
>
> for (i = 0; i < err_idx; i++)
> - vring_unmap_desc_packed(vq, &desc[i]);
> + vring_unmap_desc_packed(vq, &desc[i], true);
>
> free_desc:
> kfree(desc);
> @@ -1539,7 +1553,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> for (n = 0; n < total_sg; n++) {
> if (i == err_idx)
> break;
> - vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
> + vring_unmap_extra_packed(vq,
> + &vq->packed.desc_extra[curr], true);
> curr = vq->packed.desc_extra[curr].next;
> i++;
> if (i >= vq->packed.vring.num)
> @@ -1619,7 +1634,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
> curr = id;
> for (i = 0; i < state->num; i++) {
> vring_unmap_extra_packed(vq,
> - &vq->packed.desc_extra[curr]);
> + &vq->packed.desc_extra[curr],
> + false);
> curr = vq->packed.desc_extra[curr].next;
> }
> }
> @@ -1636,7 +1652,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
> len = vq->packed.desc_extra[id].len;
> for (i = 0; i < len / sizeof(struct vring_packed_desc);
> i++)
> - vring_unmap_desc_packed(vq, &desc[i]);
> + vring_unmap_desc_packed(vq, &desc[i], false);
> }
> kfree(desc);
> state->indir_desc = NULL;
> --
> 2.31.1