Re: vhost changes (batched) in linux-next after 12/13 trigger random crashes in KVM guests after reboot

From: Christian Borntraeger
Date: Fri Mar 27 2020 - 11:46:51 EST




On 27.03.20 12:08, Eugenio PÃrez wrote:
> Hi Christian.
>
> Sorry for the late response. Could we try this one over eccb852f1fe6bede630e2e4f1a121a81e34354ab, and see if you still
> can reproduce the bug?

To much time has passed and too many things have changed on that system.
I have trouble reproducing this with either
eccb852f1fe6bede630e2e4f1a121a81e34354ab or 52c36ce7f334.
I will try to reproduce this again :-/

>
> Apart from that, could you print me the backtrace when qemu calls vhost_kernel_set_vring_base and
> vhost_kernel_get_vring_base functions?
>
> Thank you very much!
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index e158159671fa..a1a4239512bb 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -1505,10 +1505,13 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
>
> mutex_lock(&n->dev.mutex);
> r = vhost_dev_check_owner(&n->dev);
> - if (r)
> + if (r) {
> + pr_debug("vhost_dev_check_owner index=%u fd=%d rc r=%d", index, fd, r);
> goto err;
> + }
>
> if (index >= VHOST_NET_VQ_MAX) {
> + pr_debug("vhost_dev_check_owner index=%u fd=%d MAX=%d", index, fd, VHOST_NET_VQ_MAX);
> r = -ENOBUFS;
> goto err;
> }
> @@ -1518,22 +1521,26 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
>
> /* Verify that ring has been setup correctly. */
> if (!vhost_vq_access_ok(vq)) {
> + pr_debug("vhost_net_set_backend index=%u fd=%d !vhost_vq_access_ok", index, fd);
> r = -EFAULT;
> goto err_vq;
> }
> sock = get_socket(fd);
> if (IS_ERR(sock)) {
> r = PTR_ERR(sock);
> + pr_debug("vhost_net_set_backend index=%u fd=%d get_socket err r=%d", index, fd, r);
> goto err_vq;
> }
>
> /* start polling new socket */
> oldsock = vq->private_data;
> if (sock != oldsock) {
> + pr_debug("sock=%p != oldsock=%p index=%u fd=%d vq=%p", sock, oldsock, index, fd, vq);
> ubufs = vhost_net_ubuf_alloc(vq,
> sock && vhost_sock_zcopy(sock));
> if (IS_ERR(ubufs)) {
> r = PTR_ERR(ubufs);
> + pr_debug("ubufs index=%u fd=%d err r=%d vq=%p", index, fd, r, vq);
> goto err_ubufs;
> }
>
> @@ -1541,11 +1548,15 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
> vq->private_data = sock;
> vhost_net_buf_unproduce(nvq);
> r = vhost_vq_init_access(vq);
> - if (r)
> + if (r) {
> + pr_debug("init_access index=%u fd=%d r=%d vq=%p", index, fd, r, vq);
> goto err_used;
> + }
> r = vhost_net_enable_vq(n, vq);
> - if (r)
> + if (r) {
> + pr_debug("enable_vq index=%u fd=%d r=%d vq=%p", index, fd, r, vq);
> goto err_used;
> + }
> if (index == VHOST_NET_VQ_RX)
> nvq->rx_ring = get_tap_ptr_ring(fd);
>
> @@ -1559,6 +1570,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
>
> mutex_unlock(&vq->mutex);
>
> + pr_debug("sock=%p", sock);
> +
> if (oldubufs) {
> vhost_net_ubuf_put_wait_and_free(oldubufs);
> mutex_lock(&vq->mutex);
> @@ -1712,6 +1725,9 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
> case VHOST_NET_SET_BACKEND:
> if (copy_from_user(&backend, argp, sizeof backend))
> return -EFAULT;
> + pr_debug("VHOST_NET_SET_BACKEND [b.index=%u][b.fd=%d]",
> + backend.index, backend.fd);
> + dump_stack();
> return vhost_net_set_backend(n, backend.index, backend.fd);
> case VHOST_GET_FEATURES:
> features = VHOST_NET_FEATURES;
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index b5a51b1f2e79..9dd0bcae0b22 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -372,6 +372,11 @@ static int vhost_worker(void *data)
> return 0;
> }
>
> +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq)
> +{
> + return vq->max_descs - UIO_MAXIOV;
> +}
> +
> static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> {
> kfree(vq->descs);
> @@ -394,7 +399,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> for (i = 0; i < dev->nvqs; ++i) {
> vq = dev->vqs[i];
> vq->max_descs = dev->iov_limit;
> - vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
> + if (vhost_vq_num_batch_descs(vq) < 0) {
> + return -EINVAL;
> + }
> vq->descs = kmalloc_array(vq->max_descs,
> sizeof(*vq->descs),
> GFP_KERNEL);
> @@ -1642,15 +1649,27 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> r = -EINVAL;
> break;
> }
> +
> + pr_debug(
> + "VHOST_SET_VRING_BASE [vq=%p][s.index=%u][s.num=%u][vq->avail_idx=%d][vq->last_avail_idx=%d][vq-
>> ndescs=%d][vq->first_desc=%d]",
> + vq, s.index, s.num, vq->avail_idx, vq->last_avail_idx,
> + vq->ndescs, vq->first_desc);
> + dump_stack();
> vq->last_avail_idx = s.num;
> /* Forget the cached index value. */
> vq->avail_idx = vq->last_avail_idx;
> + vq->ndescs = vq->first_desc = 0;
> break;
> case VHOST_GET_VRING_BASE:
> s.index = idx;
> s.num = vq->last_avail_idx;
> if (copy_to_user(argp, &s, sizeof s))
> r = -EFAULT;
> + pr_debug(
> + "VHOST_GET_VRING_BASE [vq=%p][s.index=%u][s.num=%u][vq->avail_idx=%d][vq->last_avail_idx=%d][vq-
>> ndescs=%d][vq->first_desc=%d]",
> + vq, s.index, s.num, vq->avail_idx, vq->last_avail_idx,
> + vq->ndescs, vq->first_desc);
> + dump_stack();
> break;
> case VHOST_SET_VRING_KICK:
> if (copy_from_user(&f, argp, sizeof f)) {
> @@ -2239,8 +2258,8 @@ static int fetch_buf(struct vhost_virtqueue *vq)
> vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
>
> if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
> - vq_err(vq, "Guest moved used index from %u to %u",
> - last_avail_idx, vq->avail_idx);
> + vq_err(vq, "Guest moved vq %p used index from %u to %u",
> + vq, last_avail_idx, vq->avail_idx);
> return -EFAULT;
> }
>
> @@ -2316,6 +2335,9 @@ static int fetch_buf(struct vhost_virtqueue *vq)
> BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
>
> /* On success, increment avail index. */
> + pr_debug(
> + "[vq=%p][vq->last_avail_idx=%u][vq->avail_idx=%u][vq->ndescs=%d][vq->first_desc=%d]",
> + vq, vq->last_avail_idx, vq->avail_idx, vq->ndescs, vq->first_desc);
> vq->last_avail_idx++;
>
> return 0;
> @@ -2333,7 +2355,7 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> if (vq->ndescs)
> return 0;
>
> - while (!ret && vq->ndescs <= vq->batch_descs)
> + while (!ret && vq->ndescs <= vhost_vq_num_batch_descs(vq))
> ret = fetch_buf(vq);
>
> return vq->ndescs ? 0 : ret;
> @@ -2432,6 +2454,9 @@ EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
> /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
> void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
> {
> + pr_debug(
> + "DISCARD [vq=%p][vq->last_avail_idx=%u][vq->avail_idx=%u][n=%d]",
> + vq, vq->last_avail_idx, vq->avail_idx, n);
> vq->last_avail_idx -= n;
> }
> EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 661088ae6dc7..e648b9b997d4 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -102,7 +102,6 @@ struct vhost_virtqueue {
> int ndescs;
> int first_desc;
> int max_descs;
> - int batch_descs;
>
> const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
> struct file *kick;
>