[PATCH 3/7] habanalabs: report EQ fault during heartbeat
From: Oded Gabbay
Date: Wed Jun 09 2021 - 11:04:07 EST
From: Ohad Sharabi <osharabi@xxxxxxxxx>
In case we have EQ fault we would like to know about it.
For this, a status bitmask was added in which EQ_FAULT bit is
set by FW in case of EQ fault.
Signed-off-by: Ohad Sharabi <osharabi@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/firmware_if.c | 8 +++++++-
.../misc/habanalabs/include/common/cpucp_if.h | 20 +++++++++++++++++++
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 9412e6707906..d5a3c786d4c9 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -362,7 +362,7 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
int hl_fw_send_heartbeat(struct hl_device *hdev)
{
- struct cpucp_packet hb_pkt = {};
+ struct cpucp_packet hb_pkt = {0};
u64 result;
int rc;
@@ -374,7 +374,13 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
sizeof(hb_pkt), 0, &result);
if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
+ return -EIO;
+
+ if (le32_to_cpu(hb_pkt.status_mask) &
+ CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK) {
+ dev_warn(hdev->dev, "FW reported EQ fault during heartbeat\n");
rc = -EIO;
+ }
return rc;
}
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index d4dc189a6c92..80b1d5a9d9f1 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -404,6 +404,20 @@ enum cpucp_packet_id {
#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48
#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull
+#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0
+#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull
+#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1
+#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull
+
+#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0
+#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull
+#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1
+#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull
+
+/* heartbeat status bits */
+#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0
+#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001
+
struct cpucp_packet {
union {
__le64 value; /* For SET packets */
@@ -445,6 +459,12 @@ struct cpucp_packet {
/* For get CpuCP info/EEPROM data/NIC info */
__le32 data_max_size;
+
+ /*
+ * For any general status bitmask. Shall be used whenever the
+ * result cannot be used to hold general purpose data.
+ */
+ __le32 status_mask;
};
__le32 reserved;
--
2.25.1