[PATCH RFC 2/3] net: ath11k: add firmware lockup detection and recovery
From: Matthew Leach
Date: Wed Mar 04 2026 - 07:49:43 EST
Detect firmware lockup when a WMI command times out and TX descriptor
exhaustion occurs within ATH11K_LOCKUP_DESC_ERR_RANGE_HZ (1 minute). In
this case, consider the firmware dead.
When a lockup is detected, queue reset work to restart the chip.
After reset completes, clear the lockup detection state.
Signed-off-by: Matthew Leach <matthew.leach@xxxxxxxxxxxxx>
---
drivers/net/wireless/ath/ath11k/core.h | 2 ++
drivers/net/wireless/ath/ath11k/mac.c | 6 ++++++
drivers/net/wireless/ath/ath11k/wmi.c | 24 +++++++++++++++++++++++-
3 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index a0d725923ef2..221dcd23b3dd 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -70,6 +70,7 @@ extern bool ath11k_ftm_mode;
#define ATH11K_RESET_FAIL_TIMEOUT_HZ (20 * HZ)
#define ATH11K_RECONFIGURE_TIMEOUT_HZ (10 * HZ)
#define ATH11K_RECOVER_START_TIMEOUT_HZ (20 * HZ)
+#define ATH11K_LOCKUP_DESC_ERR_RANGE_HZ (60 * HZ)
enum ath11k_supported_bw {
ATH11K_BW_20 = 0,
@@ -1039,6 +1040,7 @@ struct ath11k_base {
struct ath11k_dbring_cap *db_caps;
u32 num_db_cap;
+ u64 last_frame_tx_error_jiffies;
/* To synchronize 11d scan vdev id */
struct mutex vdev_id_11d_lock;
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 748f779b3d1b..a0b4d60da330 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -9,6 +9,7 @@
#include <linux/etherdevice.h>
#include <linux/bitfield.h>
#include <linux/inetdevice.h>
+#include <linux/jiffies.h>
#include <net/if_inet6.h>
#include <net/ipv6.h>
@@ -6546,6 +6547,10 @@ static void ath11k_mac_op_tx(struct ieee80211_hw *hw,
ret = ath11k_dp_tx(ar, arvif, arsta, skb);
if (unlikely(ret)) {
+ scoped_guard(spinlock_bh, &ar->ab->base_lock) {
+ ar->ab->last_frame_tx_error_jiffies = jiffies_64;
+ }
+
ath11k_warn(ar->ab, "failed to transmit frame %d\n", ret);
ieee80211_free_txskb(ar->hw, skb);
}
@@ -9281,6 +9286,7 @@ ath11k_mac_op_reconfig_complete(struct ieee80211_hw *hw,
atomic_dec(&ab->reset_count);
complete(&ab->reset_complete);
ab->is_reset = false;
+ ab->last_frame_tx_error_jiffies = 0;
atomic_set(&ab->fail_cont_count, 0);
ath11k_dbg(ab, ATH11K_DBG_BOOT, "reset success\n");
}
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 40747fba3b0c..7d9f0bcbb3b0 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -7,8 +7,11 @@
#include <linux/ctype.h>
#include <net/mac80211.h>
#include <net/cfg80211.h>
+#include <linux/cleanup.h>
#include <linux/completion.h>
#include <linux/if_ether.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/uuid.h>
@@ -325,9 +328,28 @@ int ath11k_wmi_cmd_send(struct ath11k_pdev_wmi *wmi, struct sk_buff *skb,
}), WMI_SEND_TIMEOUT_HZ);
}
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ u64 range_start;
+
ath11k_warn(wmi_ab->ab, "wmi command %d timeout\n", cmd_id);
+ guard(spinlock_bh)(&ab->base_lock);
+
+ if (ab->last_frame_tx_error_jiffies == 0)
+ return ret;
+
+ range_start =
+ (jiffies_64 > ATH11K_LOCKUP_DESC_ERR_RANGE_HZ) ?
+ jiffies_64 - ATH11K_LOCKUP_DESC_ERR_RANGE_HZ :
+ 0;
+
+ if (time_in_range64(ab->last_frame_tx_error_jiffies,
+ range_start, jiffies_64) &&
+ queue_work(ab->workqueue_aux, &ab->reset_work))
+ ath11k_err(wmi_ab->ab,
+ "Firmware lockup detected. Resetting.");
+ }
+
if (ret == -ENOBUFS)
ath11k_warn(wmi_ab->ab, "ce desc not available for wmi command %d\n",
cmd_id);
--
2.53.0