Re: net/bluetooth: workqueue destruction WARNING in hci_unregister_dev

From: Jiri Slaby
Date: Thu Mar 17 2016 - 08:00:42 EST


Hello,

On 03/11/2016, 06:12 PM, Tejun Heo wrote:
> On Thu, Mar 03, 2016 at 10:12:01AM +0100, Jiri Slaby wrote:
>> On 03/02/2016, 04:45 PM, Tejun Heo wrote:
>>> On Fri, Feb 19, 2016 at 01:10:00PM +0100, Jiri Slaby wrote:
>>>>> 1. didn't help, the problem persists. So I haven't applied the patch from 2.
>>>>
>>>> FWIW I dumped more info about the wq:
>>>> wq->name='hci0' pwq=ffff8800390d7600 wq->dfl_pwq=ffff8800390d5200
>>>> pwq->refcnt=2 pwq->nr_active=0 delayed_works: <nothing>
>>>
>>> Can you please print out the same info for all pwq's during shutdown?
>>> It looks like we're leaking pwq refcnt but I can't spot a place where
>>> that could happen on an empty pwq.
>>
>> I have not done that yet, but today, I see:
>> destroy_workqueue: name='req_hci0' pwq=ffff88002f590300
>> wq->dfl_pwq=ffff88002f591e00 pwq->refcnt=2 pwq->nr_active=0 delayed_works:
>> pwq 12: cpus=0-1 node=0 flags=0x4 nice=-20 active=0/1
>> in-flight: 18568:wq_barrier_func
>
> So, this means that there's flush_work() racing against workqueue
> destruction, which can't be safe. :(

But I cannot trigger the WARN_ONs in the attached patch, so I am
confused how this can happen :(. (While I am still seeing the destroy
WARNINGs.)

BTW. what did you mean by dumping the states at shutdown? Is it still
relevant?

thanks,
--
js
suse labs
---
include/linux/workqueue.h | 1 +
include/net/bluetooth/hci_core.h | 5 +++++
kernel/reboot.c | 1 +
kernel/workqueue.c | 34 +++++++++++++++++++++++++++++++---
net/bluetooth/hci_core.c | 39 +++++++++++++++++++++++++++++++++++----
5 files changed, 73 insertions(+), 7 deletions(-)

--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -312,6 +312,7 @@ enum {
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
+ __WQ_DESTROYING = 1 << 19,

WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -312,6 +312,11 @@ struct hci_dev {

struct workqueue_struct *workqueue;
struct workqueue_struct *req_workqueue;
+#define HCI_WQ_A 0
+#define HCI_WQ_D 1
+#define HCI_WQR_A 8
+#define HCI_WQR_D 9
+ unsigned long wq_status;

struct work_struct power_on;
struct delayed_work power_off;
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -231,6 +231,7 @@ static void kernel_shutdown_prepare(enum
(state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL);
system_state = state;
usermodehelper_disable();
+ show_workqueue_state();
device_shutdown();
}
/**
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1366,6 +1366,9 @@ static void __queue_work(int cpu, struct
unsigned int work_flags;
unsigned int req_cpu = cpu;

+ if (WARN_ON(wq->flags & __WQ_DESTROYING))
+ return;
+
/*
* While a work item is PENDING && off queue, a task trying to
* steal the PENDING will busy-loop waiting for it to either get
@@ -2804,6 +2807,9 @@ static bool start_flush_work(struct work
pwq = worker->current_pwq;
}

+ if (WARN_ON(pwq->wq->flags & __WQ_DESTROYING))
+ return false;
+
check_flush_dependency(pwq->wq, work);

insert_wq_barrier(pwq, barr, work, worker);
@@ -2821,6 +2827,8 @@ static bool start_flush_work(struct work
lock_map_acquire_read(&pwq->wq->lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);

+ WARN_ON(pwq->wq->flags & __WQ_DESTROYING);
+
return true;
already_gone:
spin_unlock_irq(&pool->lock);
@@ -3998,6 +4006,8 @@ err_destroy:
}
EXPORT_SYMBOL_GPL(__alloc_workqueue_key);

+static void show_pwq(struct pool_workqueue *pwq);
+
/**
* destroy_workqueue - safely terminate a workqueue
* @wq: target workqueue
@@ -4010,6 +4020,7 @@ void destroy_workqueue(struct workqueue_
int node;

/* drain it before proceeding with destruction */
+ wq->flags |= __WQ_DESTROYING;
drain_workqueue(wq);

/* sanity checks */
@@ -4024,9 +4035,26 @@ void destroy_workqueue(struct workqueue_
}
}

- if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
- WARN_ON(pwq->nr_active) ||
- WARN_ON(!list_empty(&pwq->delayed_works))) {
+ if ((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) {
+ pr_info("%s: name='%s' pwq=%p wq->dfl_pwq=%p pwq->refcnt=%d pwq->nr_active=%d delayed_works:",
+ __func__, wq->name, pwq, wq->dfl_pwq,
+ pwq->refcnt, pwq->nr_active);
+
+ show_pwq(pwq);
+
+ mutex_unlock(&wq->mutex);
+ WARN_ON(1);
+ return;
+ }
+
+ if (WARN_ON(pwq->nr_active)) {
+ pr_info("%s: %ps\n", __func__, wq);
+ mutex_unlock(&wq->mutex);
+ return;
+ }
+
+ if (WARN_ON(!list_empty(&pwq->delayed_works))) {
+ pr_info("%s: %ps\n", __func__, wq);
mutex_unlock(&wq->mutex);
return;
}
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1471,7 +1471,11 @@ int hci_dev_open(__u16 dev)
* has finished. This means that error conditions like RFKILL
* or no valid public or static random address apply.
*/
+ WARN_ON(test_bit(HCI_WQR_D, &hdev->wq_status));
+ WARN_ON(!test_bit(HCI_WQR_A, &hdev->wq_status));
flush_workqueue(hdev->req_workqueue);
+ WARN_ON(test_bit(HCI_WQR_D, &hdev->wq_status));
+ WARN_ON(!test_bit(HCI_WQR_A, &hdev->wq_status));

/* For controllers not using the management interface and that
* are brought up using legacy ioctl, set the HCI_BONDABLE bit
@@ -2467,6 +2471,8 @@ static void hci_cmd_timeout(struct work_
struct hci_dev *hdev = container_of(work, struct hci_dev,
cmd_timer.work);

+ if (WARN_ON(hci_dev_test_flag(hdev, HCI_UNREGISTER)))
+ return;
if (hdev->sent_cmd) {
struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
u16 opcode = __le16_to_cpu(sent->opcode);
@@ -3043,15 +3049,17 @@ int hci_register_dev(struct hci_dev *hde

BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);

- hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1, hdev->name);
+ hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND
+ , 1, hdev->name);
+ set_bit(HCI_WQ_A, &hdev->wq_status);
if (!hdev->workqueue) {
error = -ENOMEM;
goto err;
}

- hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1, hdev->name);
+ hdev->req_workqueue = alloc_workqueue("req_%s", WQ_HIGHPRI | WQ_UNBOUND
+ , 1, hdev->name);
+ set_bit(HCI_WQR_A, &hdev->wq_status);
if (!hdev->req_workqueue) {
destroy_workqueue(hdev->workqueue);
error = -ENOMEM;
@@ -3108,8 +3116,12 @@ int hci_register_dev(struct hci_dev *hde
return id;

err_wqueue:
+ set_bit(HCI_WQ_D, &hdev->wq_status);
destroy_workqueue(hdev->workqueue);
+ clear_bit(HCI_WQ_A, &hdev->wq_status);
+ set_bit(HCI_WQR_D, &hdev->wq_status);
destroy_workqueue(hdev->req_workqueue);
+ clear_bit(HCI_WQR_A, &hdev->wq_status);
err:
ida_simple_remove(&hci_index_ida, hdev->id);

@@ -3160,7 +3172,9 @@ void hci_unregister_dev(struct hci_dev *
debugfs_remove_recursive(hdev->debugfs);

destroy_workqueue(hdev->workqueue);
+ set_bit(HCI_WQR_D, &hdev->wq_status);
destroy_workqueue(hdev->req_workqueue);
+ clear_bit(HCI_WQR_A, &hdev->wq_status);

hci_dev_lock(hdev);
hci_bdaddr_list_clear(&hdev->blacklist);
@@ -3225,6 +3239,11 @@ int hci_recv_frame(struct hci_dev *hdev,
return -ENXIO;
}

+ if (WARN_ON(hci_dev_test_flag(hdev, HCI_UNREGISTER))) {
+ kfree_skb(skb);
+ return -ENXIO;
+ }
+
if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) {
@@ -3248,6 +3267,9 @@ EXPORT_SYMBOL(hci_recv_frame);
/* Receive diagnostic message from HCI drivers */
int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
{
+ if (WARN_ON(hci_dev_test_flag(hdev, HCI_UNREGISTER)))
+ return -ENXIO;
+
/* Mark as diagnostic packet */
hci_skb_pkt_type(skb) = HCI_DIAG_PKT;

@@ -3326,6 +3348,9 @@ int hci_send_cmd(struct hci_dev *hdev, _
{
struct sk_buff *skb;

+ if (WARN_ON(hci_dev_test_flag(hdev, HCI_UNREGISTER)))
+ return -ENXIO;
+
BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);

skb = hci_prepare_cmd(hdev, opcode, plen, param);
@@ -3461,6 +3486,9 @@ void hci_send_acl(struct hci_chan *chan,
{
struct hci_dev *hdev = chan->conn->hdev;

+ if (WARN_ON(hci_dev_test_flag(hdev, HCI_UNREGISTER)))
+ return;
+
BT_DBG("%s chan %p flags 0x%4.4x", hdev->name, chan, flags);

hci_queue_acl(chan, &chan->data_q, skb, flags);
@@ -3474,6 +3502,9 @@ void hci_send_sco(struct hci_conn *conn,
struct hci_dev *hdev = conn->hdev;
struct hci_sco_hdr hdr;

+ if (WARN_ON(hci_dev_test_flag(hdev, HCI_UNREGISTER)))
+ return;
+
BT_DBG("%s len %d", hdev->name, skb->len);

hdr.handle = cpu_to_le16(conn->handle);