Re: [PATCH -next v3 2/2] nbd: Fix hungtask when nbd_config_put

From: Josef Bacik
Date: Fri Oct 29 2021 - 10:38:28 EST


On Fri, Oct 29, 2021 at 05:42:28PM +0800, Ye Bin wrote:
> I got follow issue:
> [ 247.381177] INFO: task kworker/u10:0:47 blocked for more than 120 seconds.
> [ 247.382644] Not tainted 4.19.90-dirty #140
> [ 247.383502] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [ 247.385027] Call Trace:
> [ 247.388384] schedule+0xb8/0x3c0
> [ 247.388966] schedule_timeout+0x2b4/0x380
> [ 247.392815] wait_for_completion+0x367/0x510
> [ 247.397713] flush_workqueue+0x32b/0x1340
> [ 247.402700] drain_workqueue+0xda/0x3c0
> [ 247.403442] destroy_workqueue+0x7b/0x690
> [ 247.405014] nbd_config_put.cold+0x2f9/0x5b6
> [ 247.405823] recv_work+0x1fd/0x2b0
> [ 247.406485] process_one_work+0x70b/0x1610
> [ 247.407262] worker_thread+0x5a9/0x1060
> [ 247.408699] kthread+0x35e/0x430
> [ 247.410918] ret_from_fork+0x1f/0x30
>
> We can reprodeuce issue as follows:

"reproduce"

> 1. Inject memory fault in nbd_start_device
> -1244,10 +1248,18 @@ static int nbd_start_device(struct nbd_device *nbd)
> nbd_dev_dbg_init(nbd);
> for (i = 0; i < num_connections; i++) {
> struct recv_thread_args *args;
> -
> - args = kzalloc(sizeof(*args), GFP_KERNEL);
> +
> + if (i == 1) {
> + args = NULL;
> + printk("%s: inject malloc error\n", __func__);
> + }
> + else
> + args = kzalloc(sizeof(*args), GFP_KERNEL);
> 2. Inject delay in recv_work
> -757,6 +760,8 @@ static void recv_work(struct work_struct *work)
>
> blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
> }
> + printk("%s: comm=%s pid=%d\n", __func__, current->comm, current->pid);
> + mdelay(5 * 1000);
> nbd_config_put(nbd);
> atomic_dec(&config->recv_threads);
> wake_up(&config->recv_wq);
> 3. Create nbd server
> nbd-server 8000 /tmp/disk
> 4. Create nbd client
> nbd-client localhost 8000 /dev/nbd1
> Then will trigger above issue.
>
> Reason is when add delay in recv_work, lead to relase the last reference

"release"

> of 'nbd->config_refs'. nbd_config_put will call flush_workqueue to make
> all work finish. Obviously, it will lead to deadloop.
> To solve this issue, according to Josef's suggestion move 'recv_work'
> init from start device to nbd_dev_add, then destory 'recv_work'when

"destroy"

> nbd device teardown.
>
> Signed-off-by: Ye Bin <yebin10@xxxxxxxxxx>
> ---
> drivers/block/nbd.c | 30 ++++++++++++++----------------
> 1 file changed, 14 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 096883ab9b76..c9a65a260668 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -1314,10 +1314,6 @@ static void nbd_config_put(struct nbd_device *nbd)
> kfree(nbd->config);
> nbd->config = NULL;
>
> - if (nbd->recv_workq)
> - destroy_workqueue(nbd->recv_workq);
> - nbd->recv_workq = NULL;
> -
> nbd->tag_set.timeout = 0;
> nbd->disk->queue->limits.discard_granularity = 0;
> nbd->disk->queue->limits.discard_alignment = 0;
> @@ -1346,14 +1342,6 @@ static int nbd_start_device(struct nbd_device *nbd)
> return -EINVAL;
> }
>
> - nbd->recv_workq = alloc_workqueue("knbd%d-recv",
> - WQ_MEM_RECLAIM | WQ_HIGHPRI |
> - WQ_UNBOUND, 0, nbd->index);
> - if (!nbd->recv_workq) {
> - dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
> - return -ENOMEM;
> - }
> -
> blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
> nbd->pid = task_pid_nr(current);
>
> @@ -1779,6 +1767,15 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> }
> nbd->disk = disk;
>
> + nbd->recv_workq = alloc_workqueue("nbd%d-recv",
> + WQ_MEM_RECLAIM | WQ_HIGHPRI |
> + WQ_UNBOUND, 0, nbd->index);
> + if (!nbd->recv_workq) {
> + dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
> + err = -ENOMEM;
> + goto out_err_disk;
> + }
> +

You never free this up, you need to add a destroy_workqueue(nbd->rsv_workq) to
nbd_dev_remove().

> /*
> * Tell the block layer that we are not a rotational device
> */
> @@ -1809,7 +1806,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> disk->first_minor = index << part_shift;
> if (disk->first_minor > 0xff) {
> err = -EINVAL;
> - goto out_err_disk;
> + goto out_free_work;
> }
>
> disk->minors = 1 << part_shift;
> @@ -1818,7 +1815,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> sprintf(disk->disk_name, "nbd%d", index);
> err = add_disk(disk);
> if (err)
> - goto out_err_disk;
> + goto out_free_work;
>
> /*
> * Now publish the device.
> @@ -1827,6 +1824,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
> nbd_total_devices++;
> return nbd;
>
> +out_free_work:
> + destroy_workqueue(nbd->recv_workq);
> out_err_disk:
> blk_cleanup_disk(disk);
> out_free_idr:
> @@ -2087,8 +2086,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
> * queue. And this also ensure that we can safely call nbd_clear_que()
> * to cancel the inflight I/Os.
> */
> - if (nbd->recv_workq)
> - flush_workqueue(nbd->recv_workq);
> + flush_workqueue(nbd->recv_workq);

The comment above this part needs to be updated, as we no longer have this
problem. Thanks,

Josef