Re: vhost changes (batched) in linux-next after 12/13 trigger random crashes in KVM guests after reboot

From: Eugenio Pérez
Date: Fri Mar 27 2020 - 07:08:38 EST


Hi Christian.

Sorry for the late response. Could we try this one over eccb852f1fe6bede630e2e4f1a121a81e34354ab, and see if you still
can reproduce the bug?

Apart from that, could you print me the backtrace when qemu calls vhost_kernel_set_vring_base and
vhost_kernel_get_vring_base functions?

Thank you very much!

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e158159671fa..a1a4239512bb 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1505,10 +1505,13 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)

mutex_lock(&n->dev.mutex);
r = vhost_dev_check_owner(&n->dev);
- if (r)
+ if (r) {
+ pr_debug("vhost_dev_check_owner index=%u fd=%d rc r=%d", index, fd, r);
goto err;
+ }

if (index >= VHOST_NET_VQ_MAX) {
+ pr_debug("vhost_dev_check_owner index=%u fd=%d MAX=%d", index, fd, VHOST_NET_VQ_MAX);
r = -ENOBUFS;
goto err;
}
@@ -1518,22 +1521,26 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)

/* Verify that ring has been setup correctly. */
if (!vhost_vq_access_ok(vq)) {
+ pr_debug("vhost_net_set_backend index=%u fd=%d !vhost_vq_access_ok", index, fd);
r = -EFAULT;
goto err_vq;
}
sock = get_socket(fd);
if (IS_ERR(sock)) {
r = PTR_ERR(sock);
+ pr_debug("vhost_net_set_backend index=%u fd=%d get_socket err r=%d", index, fd, r);
goto err_vq;
}

/* start polling new socket */
oldsock = vq->private_data;
if (sock != oldsock) {
+ pr_debug("sock=%p != oldsock=%p index=%u fd=%d vq=%p", sock, oldsock, index, fd, vq);
ubufs = vhost_net_ubuf_alloc(vq,
sock && vhost_sock_zcopy(sock));
if (IS_ERR(ubufs)) {
r = PTR_ERR(ubufs);
+ pr_debug("ubufs index=%u fd=%d err r=%d vq=%p", index, fd, r, vq);
goto err_ubufs;
}

@@ -1541,11 +1548,15 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
vq->private_data = sock;
vhost_net_buf_unproduce(nvq);
r = vhost_vq_init_access(vq);
- if (r)
+ if (r) {
+ pr_debug("init_access index=%u fd=%d r=%d vq=%p", index, fd, r, vq);
goto err_used;
+ }
r = vhost_net_enable_vq(n, vq);
- if (r)
+ if (r) {
+ pr_debug("enable_vq index=%u fd=%d r=%d vq=%p", index, fd, r, vq);
goto err_used;
+ }
if (index == VHOST_NET_VQ_RX)
nvq->rx_ring = get_tap_ptr_ring(fd);

@@ -1559,6 +1570,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)

mutex_unlock(&vq->mutex);

+ pr_debug("sock=%p", sock);
+
if (oldubufs) {
vhost_net_ubuf_put_wait_and_free(oldubufs);
mutex_lock(&vq->mutex);
@@ -1712,6 +1725,9 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
case VHOST_NET_SET_BACKEND:
if (copy_from_user(&backend, argp, sizeof backend))
return -EFAULT;
+ pr_debug("VHOST_NET_SET_BACKEND [b.index=%u][b.fd=%d]",
+ backend.index, backend.fd);
+ dump_stack();
return vhost_net_set_backend(n, backend.index, backend.fd);
case VHOST_GET_FEATURES:
features = VHOST_NET_FEATURES;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index b5a51b1f2e79..9dd0bcae0b22 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -372,6 +372,11 @@ static int vhost_worker(void *data)
return 0;
}

+static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq)
+{
+ return vq->max_descs - UIO_MAXIOV;
+}
+
static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
{
kfree(vq->descs);
@@ -394,7 +399,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
vq->max_descs = dev->iov_limit;
- vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
+ if (vhost_vq_num_batch_descs(vq) < 0) {
+ return -EINVAL;
+ }
vq->descs = kmalloc_array(vq->max_descs,
sizeof(*vq->descs),
GFP_KERNEL);
@@ -1642,15 +1649,27 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EINVAL;
break;
}
+
+ pr_debug(
+ "VHOST_SET_VRING_BASE [vq=%p][s.index=%u][s.num=%u][vq->avail_idx=%d][vq->last_avail_idx=%d][vq-
>ndescs=%d][vq->first_desc=%d]",
+ vq, s.index, s.num, vq->avail_idx, vq->last_avail_idx,
+ vq->ndescs, vq->first_desc);
+ dump_stack();
vq->last_avail_idx = s.num;
/* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx;
+ vq->ndescs = vq->first_desc = 0;
break;
case VHOST_GET_VRING_BASE:
s.index = idx;
s.num = vq->last_avail_idx;
if (copy_to_user(argp, &s, sizeof s))
r = -EFAULT;
+ pr_debug(
+ "VHOST_GET_VRING_BASE [vq=%p][s.index=%u][s.num=%u][vq->avail_idx=%d][vq->last_avail_idx=%d][vq-
>ndescs=%d][vq->first_desc=%d]",
+ vq, s.index, s.num, vq->avail_idx, vq->last_avail_idx,
+ vq->ndescs, vq->first_desc);
+ dump_stack();
break;
case VHOST_SET_VRING_KICK:
if (copy_from_user(&f, argp, sizeof f)) {
@@ -2239,8 +2258,8 @@ static int fetch_buf(struct vhost_virtqueue *vq)
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);

if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
- last_avail_idx, vq->avail_idx);
+ vq_err(vq, "Guest moved vq %p used index from %u to %u",
+ vq, last_avail_idx, vq->avail_idx);
return -EFAULT;
}

@@ -2316,6 +2335,9 @@ static int fetch_buf(struct vhost_virtqueue *vq)
BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));

/* On success, increment avail index. */
+ pr_debug(
+ "[vq=%p][vq->last_avail_idx=%u][vq->avail_idx=%u][vq->ndescs=%d][vq->first_desc=%d]",
+ vq, vq->last_avail_idx, vq->avail_idx, vq->ndescs, vq->first_desc);
vq->last_avail_idx++;

return 0;
@@ -2333,7 +2355,7 @@ static int fetch_descs(struct vhost_virtqueue *vq)
if (vq->ndescs)
return 0;

- while (!ret && vq->ndescs <= vq->batch_descs)
+ while (!ret && vq->ndescs <= vhost_vq_num_batch_descs(vq))
ret = fetch_buf(vq);

return vq->ndescs ? 0 : ret;
@@ -2432,6 +2454,9 @@ EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
{
+ pr_debug(
+ "DISCARD [vq=%p][vq->last_avail_idx=%u][vq->avail_idx=%u][n=%d]",
+ vq, vq->last_avail_idx, vq->avail_idx, n);
vq->last_avail_idx -= n;
}
EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 661088ae6dc7..e648b9b997d4 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -102,7 +102,6 @@ struct vhost_virtqueue {
int ndescs;
int first_desc;
int max_descs;
- int batch_descs;

const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;