[PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump

From: Venkata Lakshmi Narayana Gubba
Date: Thu Feb 13 2020 - 10:56:45 EST


This patch will fix the below issues
1.Fixed race conditions while accessing memory dump state flags.
2.Updated with actual context of timer in hci_memdump_timeout()
3.Updated injecting hardware error event if the dumps failed to receive.
4.Once timeout is triggered, stopping the memory dump collections.

Possible scenarios while collecting memory dump:

Scenario 1:

Memdump event from firmware
Some number of memdump events with seq #
Hw error event
Reset

Scenario 2:

Memdump event from firmware
Some number of memdump events with seq #
Timeout schedules hw_error_event if hw error event is not received already
hw_error_event clears the memdump activity
reset

Scenario 3:

hw_error_event sends memdump command to firmware and waits for completion
Some number of memdump events with seq #
hw error event
reset

Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR")
Reported-by: Abhishek Pandit-Subedi <abhishekpandit@xxxxxxxxxxxx>
Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@xxxxxxxxxxxxxx>
---
v3:
* Removed memdump_timer completely.
* Used delayed work queue.
---
drivers/bluetooth/hci_qca.c | 101 +++++++++++++++++++++++++++++---------------
1 file changed, 67 insertions(+), 34 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index eacc65b..9cae5fe 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -29,6 +29,7 @@
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
#include <linux/serdev.h>
+#include <linux/mutex.h>
#include <asm/unaligned.h>

#include <net/bluetooth/bluetooth.h>
@@ -69,7 +70,8 @@ enum qca_flags {
QCA_IBS_ENABLED,
QCA_DROP_VENDOR_EVENT,
QCA_SUSPENDING,
- QCA_MEMDUMP_COLLECTION
+ QCA_MEMDUMP_COLLECTION,
+ QCA_HW_ERROR_EVENT
};


@@ -138,18 +140,19 @@ struct qca_data {
u32 tx_idle_delay;
struct timer_list wake_retrans_timer;
u32 wake_retrans;
- struct timer_list memdump_timer;
struct workqueue_struct *workqueue;
struct work_struct ws_awake_rx;
struct work_struct ws_awake_device;
struct work_struct ws_rx_vote_off;
struct work_struct ws_tx_vote_off;
struct work_struct ctrl_memdump_evt;
+ struct delayed_work ctrl_memdump_timeout;
struct qca_memdump_data *qca_memdump;
unsigned long flags;
struct completion drop_ev_comp;
wait_queue_head_t suspend_wait_q;
enum qca_memdump_states memdump_state;
+ struct mutex hci_memdump_lock;

/* For debugging purpose */
u64 ibs_sent_wacks;
@@ -522,23 +525,28 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
hci_uart_tx_wakeup(hu);
}

-static void hci_memdump_timeout(struct timer_list *t)
+
+static void qca_controller_memdump_timeout(struct work_struct *work)
{
- struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
+ struct qca_data *qca = container_of(work, struct qca_data,
+ ctrl_memdump_timeout.work);
struct hci_uart *hu = qca->hu;
- struct qca_memdump_data *qca_memdump = qca->qca_memdump;
- char *memdump_buf = qca_memdump->memdump_buf_tail;
-
- bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
- /* Inject hw error event to reset the device and driver. */
- hci_reset_dev(hu->hdev);
- vfree(memdump_buf);
- kfree(qca_memdump);
- qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
- del_timer(&qca->memdump_timer);
- cancel_work_sync(&qca->ctrl_memdump_evt);
+
+ mutex_lock(&qca->hci_memdump_lock);
+ if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) {
+ qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+ if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) {
+ /* Inject hw error event to reset the device
+ * and driver.
+ */
+ hci_reset_dev(hu->hdev);
+ }
+ }
+
+ mutex_unlock(&qca->hci_memdump_lock);
}

+
/* Initialize protocol */
static int qca_open(struct hci_uart *hu)
{
@@ -558,6 +566,7 @@ static int qca_open(struct hci_uart *hu)
skb_queue_head_init(&qca->tx_wait_q);
skb_queue_head_init(&qca->rx_memdump_q);
spin_lock_init(&qca->hci_ibs_lock);
+ mutex_init(&qca->hci_memdump_lock);
qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
if (!qca->workqueue) {
BT_ERR("QCA Workqueue not initialized properly");
@@ -570,6 +579,8 @@ static int qca_open(struct hci_uart *hu)
INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
+ INIT_DELAYED_WORK(&qca->ctrl_memdump_timeout,
+ qca_controller_memdump_timeout);
init_waitqueue_head(&qca->suspend_wait_q);

qca->hu = hu;
@@ -596,7 +607,6 @@ static int qca_open(struct hci_uart *hu)

timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
qca->tx_idle_delay = IBS_HOST_TX_IDLE_TIMEOUT_MS;
- timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0);

BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
qca->tx_idle_delay, qca->wake_retrans);
@@ -677,7 +687,6 @@ static int qca_close(struct hci_uart *hu)
skb_queue_purge(&qca->rx_memdump_q);
del_timer(&qca->tx_idle_timer);
del_timer(&qca->wake_retrans_timer);
- del_timer(&qca->memdump_timer);
destroy_workqueue(qca->workqueue);
qca->hu = NULL;

@@ -963,11 +972,20 @@ static void qca_controller_memdump(struct work_struct *work)

while ((skb = skb_dequeue(&qca->rx_memdump_q))) {

+ mutex_lock(&qca->hci_memdump_lock);
+ /* Skip processing the received packets if timeout detected. */
+ if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) {
+ mutex_unlock(&qca->hci_memdump_lock);
+ return;
+ }
+
if (!qca_memdump) {
qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
GFP_ATOMIC);
- if (!qca_memdump)
+ if (!qca_memdump) {
+ mutex_unlock(&qca->hci_memdump_lock);
return;
+ }

qca->qca_memdump = qca_memdump;
}
@@ -992,13 +1010,15 @@ static void qca_controller_memdump(struct work_struct *work)
if (!(dump_size)) {
bt_dev_err(hu->hdev, "Rx invalid memdump size");
kfree_skb(skb);
+ mutex_unlock(&qca->hci_memdump_lock);
return;
}

bt_dev_info(hu->hdev, "QCA collecting dump of size:%u",
dump_size);
- mod_timer(&qca->memdump_timer, (jiffies +
- msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)));
+ queue_delayed_work(qca->workqueue,
+ &qca->ctrl_memdump_timeout,
+ msecs_to_jiffies(MEMDUMP_TIMEOUT_MS));

skb_pull(skb, sizeof(dump_size));
memdump_buf = vmalloc(dump_size);
@@ -1016,6 +1036,7 @@ static void qca_controller_memdump(struct work_struct *work)
kfree(qca_memdump);
kfree_skb(skb);
qca->qca_memdump = NULL;
+ mutex_unlock(&qca->hci_memdump_lock);
return;
}

@@ -1046,16 +1067,20 @@ static void qca_controller_memdump(struct work_struct *work)
memdump_buf = qca_memdump->memdump_buf_head;
dev_coredumpv(&hu->serdev->dev, memdump_buf,
qca_memdump->received_dump, GFP_KERNEL);
- del_timer(&qca->memdump_timer);
+ cancel_delayed_work(&qca->ctrl_memdump_timeout);
kfree(qca->qca_memdump);
qca->qca_memdump = NULL;
qca->memdump_state = QCA_MEMDUMP_COLLECTED;
+ clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
}
+
+ mutex_unlock(&qca->hci_memdump_lock);
}

}

-int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb)
+static int qca_controller_memdump_event(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
struct hci_uart *hu = hci_get_drvdata(hdev);
struct qca_data *qca = hu->priv;
@@ -1406,30 +1431,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev)
{
struct hci_uart *hu = hci_get_drvdata(hdev);
struct qca_data *qca = hu->priv;
- struct qca_memdump_data *qca_memdump = qca->qca_memdump;
- char *memdump_buf = NULL;

wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);

clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
- if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
- bt_dev_err(hu->hdev, "Clearing the buffers due to timeout");
- if (qca_memdump)
- memdump_buf = qca_memdump->memdump_buf_tail;
- vfree(memdump_buf);
- kfree(qca_memdump);
- qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
- del_timer(&qca->memdump_timer);
- cancel_work_sync(&qca->ctrl_memdump_evt);
- }
}

static void qca_hw_error(struct hci_dev *hdev, u8 code)
{
struct hci_uart *hu = hci_get_drvdata(hdev);
struct qca_data *qca = hu->priv;
+ struct qca_memdump_data *qca_memdump = qca->qca_memdump;
+ char *memdump_buf = NULL;

+ set_bit(QCA_HW_ERROR_EVENT, &qca->flags);
bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);

if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
@@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code)
bt_dev_info(hdev, "waiting for dump to complete");
qca_wait_for_dump_collection(hdev);
}
+
+ if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
+ bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
+ mutex_lock(&qca->hci_memdump_lock);
+ if (qca_memdump)
+ memdump_buf = qca_memdump->memdump_buf_head;
+ vfree(memdump_buf);
+ kfree(qca_memdump);
+ qca->qca_memdump = NULL;
+ qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+ cancel_delayed_work(&qca->ctrl_memdump_timeout);
+ skb_queue_purge(&qca->rx_memdump_q);
+ mutex_unlock(&qca->hci_memdump_lock);
+ cancel_work_sync(&qca->ctrl_memdump_evt);
+ }
+
+ clear_bit(QCA_HW_ERROR_EVENT, &qca->flags);
}

static void qca_cmd_timeout(struct hci_dev *hdev)
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation