Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

From: Pankaj Gupta
Date: Wed May 08 2019 - 07:13:50 EST



>
> On 4/25/19 10:00 PM, Pankaj Gupta wrote:
>
> > +void host_ack(struct virtqueue *vq)
> > +{
> > + unsigned int len;
> > + unsigned long flags;
> > + struct virtio_pmem_request *req, *req_buf;
> > + struct virtio_pmem *vpmem = vq->vdev->priv;
> > +
> > + spin_lock_irqsave(&vpmem->pmem_lock, flags);
> > + while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
> > + req->done = true;
> > + wake_up(&req->host_acked);
> > +
> > + if (!list_empty(&vpmem->req_list)) {
> > + req_buf = list_first_entry(&vpmem->req_list,
> > + struct virtio_pmem_request, list);
> > + list_del(&vpmem->req_list);
>
> Shouldn't it be rather `list_del(vpmem->req_list.next)`? We are trying to
> unlink
> first element of the list and `vpmem->req_list` is just the list head.

This looks correct. We are not deleting head but first entry in 'req_list'
which is device corresponding list of pending requests.

Please see below:

/**
* Retrieve the first list entry for the given list pointer.
*
* Example:
* struct foo *first;
* first = list_first_entry(&bar->list_of_foos, struct foo, list_of_foos);
*
* @param ptr The list head
* @param type Data type of the list element to retrieve
* @param member Member name of the struct list_head field in the list element.
* @return A pointer to the first list element.
*/
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)

>
> > +int virtio_pmem_flush(struct nd_region *nd_region)
> > +{
> > + int err;
> > + unsigned long flags;
> > + struct scatterlist *sgs[2], sg, ret;
> > + struct virtio_device *vdev = nd_region->provider_data;
> > + struct virtio_pmem *vpmem = vdev->priv;
> > + struct virtio_pmem_request *req;
> > +
> > + might_sleep();
> > + req = kmalloc(sizeof(*req), GFP_KERNEL);
> > + if (!req)
> > + return -ENOMEM;
> > +
> > + req->done = req->wq_buf_avail = false;
> > + strcpy(req->name, "FLUSH");
> > + init_waitqueue_head(&req->host_acked);
> > + init_waitqueue_head(&req->wq_buf);
> > + sg_init_one(&sg, req->name, strlen(req->name));
> > + sgs[0] = &sg;
> > + sg_init_one(&ret, &req->ret, sizeof(req->ret));
> > + sgs[1] = &ret;
> > +
> > + spin_lock_irqsave(&vpmem->pmem_lock, flags);
> > + err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> > + if (err) {
> > + dev_err(&vdev->dev, "failed to send command to virtio pmem device\n");
> > +
> > + list_add_tail(&vpmem->req_list, &req->list);
> > + spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> > +
> > + /* When host has read buffer, this completes via host_ack */
> > + wait_event(req->wq_buf, req->wq_buf_avail);
> > + spin_lock_irqsave(&vpmem->pmem_lock, flags);
> > + }
>
> Aren't the arguments in `list_add_tail` swapped? The element we are adding

No, this is intentional. 'vpmem->req_list' maintains a list of pending requests
for entire pmem device. 'req->list'is per request list and maintains pending
request on virtio queue add failure. I think we don't need this list.

> should
> be first, the list should be second. Also, shouldn't we resubmit the request
> after
> waking up from `wait_event(req->wq_buf, req->wq_buf_avail)`?

Yes. we should. Good point.

>
> I propose rewriting it like that:
>
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> index 66b582f751a3..ff0556b04e86 100644
> --- a/drivers/nvdimm/virtio_pmem.c
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -25,7 +25,7 @@ void host_ack(struct virtqueue *vq)
> if (!list_empty(&vpmem->req_list)) {
> req_buf = list_first_entry(&vpmem->req_list,
> struct virtio_pmem_request, list);
> - list_del(&vpmem->req_list);
> + list_del(vpmem->req_list.next);

Don't think its correct.

> req_buf->wq_buf_avail = true;
> wake_up(&req_buf->wq_buf);
> }
> @@ -59,17 +59,33 @@ int virtio_pmem_flush(struct nd_region *nd_region)
> sgs[1] = &ret;
>
> spin_lock_irqsave(&vpmem->pmem_lock, flags);
> - err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> - if (err) {
> - dev_err(&vdev->dev, "failed to send command to virtio pmem device\n");
> + /*
> + * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual queue does not
> + * have free descriptor slots. We add the request to req_list and wait
> + * for host_ack to wake us up when free slots are available.
> + */
> + while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC))
> == -ENOSPC) {
> + dev_err(&vdev->dev, "failed to send command to virtio pmem device, no free
> slots in the virtqueue, postponing request\n");
> + req->wq_buf_avail = false;
>
> - list_add_tail(&vpmem->req_list, &req->list);
> + list_add_tail(&req->list, &vpmem->req_list);
> spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
>
> /* When host has read buffer, this completes via host_ack */
> wait_event(req->wq_buf, req->wq_buf_avail);
> spin_lock_irqsave(&vpmem->pmem_lock, flags);
> }
> +
> + /*
> + * virtqueue_add_sgs failed with error different than -ENOSPC, we can't
> + * do anything about that.
> + */
> + if (err) {
> + dev_info(&vdev->dev, "failed to send command to virtio pmem device, error
> code %d\n", err);
> + spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> + err = -EIO;
> + goto ret;
> + }
> err = virtqueue_kick(vpmem->req_vq);
> spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
>
>
> Let me know if it looks reasonable to you.

Don't think this is fulfilling entire logic correctly. But thanks, I spotted a bug in my code :)
Will fix it.

>
> Thank you,
> Jakub Staron
>
>