[PATCH 09/12] habanalabs: handle events during soft-reset

From: Oded Gabbay
Date: Sun Nov 28 2021 - 14:37:07 EST


From: Ofir Bitton <obitton@xxxxxxxxx>

Driver should handle events during soft-reset as F/W is not
going through reset and it keeps sending events towards host.

Signed-off-by: Ofir Bitton <obitton@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/device.c | 4 ++++
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
drivers/misc/habanalabs/common/irq.c | 2 +-
3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 822d9cec5aaf..720eea0b7e9c 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1019,6 +1019,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)

handle_reset_trigger(hdev, flags);

+ hdev->is_in_soft_reset = !hard_reset;
+
/* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true;

@@ -1171,6 +1173,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
* is required for the initialization itself
*/
hdev->disabled = false;
+ hdev->is_in_soft_reset = false;

rc = hdev->asic_funcs->hw_init(hdev);
if (rc) {
@@ -1242,6 +1245,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)

out_err:
hdev->disabled = true;
+ hdev->is_in_soft_reset = false;

if (hard_reset) {
dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index a465b4a5f31d..c2129c9fe9e4 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2591,6 +2591,7 @@ struct last_error_session_info {
* protocol will throw an error. Relevant only for
* cases where Linux was not loaded to device CPU
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
+ * @is_in_soft_reset: Device is currently in soft reset process.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -2719,6 +2720,7 @@ struct hl_device {
u8 device_cpu_is_halted;
u8 supports_wait_for_multi_cs;
u8 stream_master_qid_arr_size;
+ u8 is_in_soft_reset;

/* Parameters for bring-up */
u64 nic_ports_mask;
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index 9fd4c18e274e..64e0d9de21bd 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -245,7 +245,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
*/
dma_rmb();

- if (hdev->disabled) {
+ if (hdev->disabled && !hdev->is_in_soft_reset) {
dev_warn(hdev->dev, "Device disabled but received an EQ event\n");
goto skip_irq;
}
--
2.25.1