Re: [Xen-devel] [PATCH] xen-netfront: set real_num_tx_queues to zreo avoid to trigger BUG_ON

From: David Vrabel
Date: Mon Feb 22 2016 - 09:10:13 EST


On 20/02/16 01:27, Gonglei wrote:
> It's possible for a race condition to exist between xennet_open() and
> talk_to_netback(). After invoking netfront_probe() then other
> threads or processes invoke xennet_open (such as NetworkManager)
> immediately may trigger BUG_ON(). Besides, we also should reset
> real_num_tx_queues in xennet_destroy_queues().

I think you want something like the following. We serialize the backend
changing and the netdev open with the device mutex.

(I've not even tried to compile this so it might not even build.)

David

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index d6abf19..7e2791a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -340,22 +340,28 @@ static int xennet_open(struct net_device *dev)
unsigned int i = 0;
struct netfront_queue *queue = NULL;

+ device_lock(&np->xbdev->dev);
+
+ if (!netif_carrier_ok(dev))
+ goto unlock;
+
for (i = 0; i < num_queues; ++i) {
queue = &np->queues[i];
napi_enable(&queue->napi);

spin_lock_bh(&queue->rx_lock);
- if (netif_carrier_ok(dev)) {
- xennet_alloc_rx_buffers(queue);
- queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
- if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
- napi_schedule(&queue->napi);
- }
+ xennet_alloc_rx_buffers(queue);
+ queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
+ if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
+ napi_schedule(&queue->napi);
spin_unlock_bh(&queue->rx_lock);
}

netif_tx_start_all_queues(dev);

+ unlock:
+ device_unlock(&np->xbdev->dev);
+
return 0;
}

@@ -1983,8 +1989,8 @@ static int xennet_connect(struct net_device *dev)
num_queues = dev->real_num_tx_queues;

rtnl_lock();
+
netdev_update_features(dev);
- rtnl_unlock();

/*
* All public and private state should now be sane. Get
@@ -1992,7 +1998,6 @@ static int xennet_connect(struct net_device *dev)
* domain a kick because we've probably just requeued some
* packets.
*/
- netif_carrier_on(np->netdev);
for (j = 0; j < num_queues; ++j) {
queue = &np->queues[j];

@@ -2006,9 +2011,17 @@ static int xennet_connect(struct net_device *dev)

spin_lock_bh(&queue->rx_lock);
xennet_alloc_rx_buffers(queue);
+ if (netif_running(dev)) {
+ if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
+ napi_schedule(&queue->napi);
+ }
spin_unlock_bh(&queue->rx_lock);
}

+ netif_carrier_on(np->netdev);
+
+ rtnl_unlock();
+
return 0;
}

diff --git a/drivers/xen/xenbus/xenbus_probe.c
b/drivers/xen/xenbus/xenbus_probe.c
index 33a31cf..57cd20d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -204,8 +204,11 @@ void xenbus_otherend_changed(struct xenbus_watch
*watch,
return;
}

- if (drv->otherend_changed)
+ if (drv->otherend_changed) {
+ device_lock(&dev->dev);
drv->otherend_changed(dev, state);
+ device_unlock(&dev->dev);
+ }
}
EXPORT_SYMBOL_GPL(xenbus_otherend_changed);