[PATCH 09/15] habanalabs/gaudi2: use graceful hard reset for F/W events

From: Oded Gabbay
Date: Thu Oct 27 2022 - 05:11:39 EST


From: Tomer Tayar <ttayar@xxxxxxxxx>

Use graceful hard reset for F/W events on Gaudi2 device that require a
device reset.

While at it, do a small refactor of the checks and function calls,
to simplify it and to avoid code duplication.

Signed-off-by: Tomer Tayar <ttayar@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/gaudi2/gaudi2.c | 27 +++++++++----------------
1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 9208f69dd7f8..22f5445fe71c 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8768,9 +8768,9 @@ static void hl_arc_event_handle(struct hl_device *hdev,

static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
- u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY;
- struct gaudi2_device *gaudi2 = hdev->asic_specific;
bool reset_required = false, skip_reset = false, is_critical = false;
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ u32 ctl, reset_flags = HL_DRV_RESET_HARD;
int index, sbte_index;
u64 event_mask = 0;
u16 event_type;
@@ -9158,7 +9158,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
event_type);
}

- if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset)
+ if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset &&
+ (hdev->hard_reset_on_fw_events ||
+ (hdev->asic_prop.fw_security_enabled && is_critical)))
goto reset_device;

/* Send unmask irq only for interrupts not classified as MSG */
@@ -9172,22 +9174,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent

reset_device:
if (hdev->asic_prop.fw_security_enabled && is_critical) {
- reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
-
- /* notify on device unavailable while the reset triggered by fw */
- event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
- HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
- hl_device_reset(hdev, reset_flags);
- } else if (hdev->hard_reset_on_fw_events) {
- event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
- hl_device_reset(hdev, reset_flags);
+ reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
+ event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
} else {
- if (!gaudi2_irq_map_table[event_type].msg)
- hl_fw_unmask_irq(hdev, event_type);
+ reset_flags |= HL_DRV_RESET_DELAY;
}
-
- if (event_mask)
- hl_notifier_event_send_all(hdev, event_mask);
+ event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+ hl_device_cond_reset(hdev, reset_flags, event_mask);
}

static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
--
2.25.1