[PATCH 01/12] habanalabs: fixes to the poll-timeout macros

From: Oded Gabbay
Date: Mon Jul 11 2022 - 02:30:18 EST


From: Ohad Sharabi <osharabi@xxxxxxxxx>

- use conventional internal macro variables (double underscore prefix)
- adjust address casting
- on register poll using ELBI use ELBI read rather than BAR read on
error condition
- remove unused macro

Signed-off-by: Ohad Sharabi <osharabi@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/habanalabs.h | 119 +++++++++++++++-----
1 file changed, 90 insertions(+), 29 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 72cb12f2068a..3c51eaca521c 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2473,9 +2473,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
/* Timeout should be longer when working with simulator but cap the
* increased timeout to some maximum
*/
-#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+#define hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, elbi) \
({ \
ktime_t __timeout; \
+ u32 __elbi_read; \
+ int __rc = 0; \
if (hdev->pdev) \
__timeout = ktime_add_us(ktime_get(), timeout_us); \
else \
@@ -2484,19 +2486,103 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(u64) HL_SIM_MAX_TIMEOUT_US)); \
might_sleep_if(sleep_us); \
for (;;) { \
- (val) = RREG32(addr); \
+ if (elbi) { \
+ __rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+ if (__rc) \
+ break; \
+ (val) = __elbi_read; \
+ } else {\
+ (val) = RREG32((u32)addr); \
+ } \
if (cond) \
break; \
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
- (val) = RREG32(addr); \
+ if (elbi) { \
+ __rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+ if (__rc) \
+ break; \
+ (val) = __elbi_read; \
+ } else {\
+ (val) = RREG32((u32)addr); \
+ } \
break; \
} \
if (sleep_us) \
usleep_range((sleep_us >> 2) + 1, sleep_us); \
} \
- (cond) ? 0 : -ETIMEDOUT; \
+ __rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \
})

+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+ hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, false)
+
+#define hl_poll_timeout_elbi(hdev, addr, val, cond, sleep_us, timeout_us) \
+ hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, true)
+
+/*
+ * poll array of register addresses.
+ * condition is satisfied if all registers values match the expected value.
+ * once some register in the array satisfies the condition it will not be polled again,
+ * this is done both for efficiency and due to some registers are "clear on read".
+ * TODO: use read from PCI bar in other places in the code (SW-91406)
+ */
+#define hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+ timeout_us, elbi) \
+({ \
+ ktime_t __timeout; \
+ u64 __elem_bitmask; \
+ u32 __read_val; \
+ u8 __arr_idx; \
+ int __rc = 0; \
+ \
+ if (hdev->pdev) \
+ __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ else \
+ __timeout = ktime_add_us(ktime_get(),\
+ min(((u64)timeout_us * 10), \
+ (u64) HL_SIM_MAX_TIMEOUT_US)); \
+ \
+ might_sleep_if(sleep_us); \
+ if (arr_size >= 64) \
+ __rc = -EINVAL; \
+ else \
+ __elem_bitmask = BIT_ULL(arr_size) - 1; \
+ for (;;) { \
+ if (__rc) \
+ break; \
+ for (__arr_idx = 0; __arr_idx < (arr_size); __arr_idx++) { \
+ if (!(__elem_bitmask & BIT_ULL(__arr_idx))) \
+ continue; \
+ if (elbi) { \
+ __rc = hl_pci_elbi_read(hdev, (addr_arr)[__arr_idx], &__read_val); \
+ if (__rc) \
+ break; \
+ } else { \
+ __read_val = RREG32((u32)(addr_arr)[__arr_idx]); \
+ } \
+ if (__read_val == (expected_val)) \
+ __elem_bitmask &= ~BIT_ULL(__arr_idx); \
+ } \
+ if (__rc || (__elem_bitmask == 0)) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
+ break; \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ __rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \
+})
+
+#define hl_poll_reg_array_timeout(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+ timeout_us) \
+ hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+ timeout_us, false)
+
+#define hl_poll_reg_array_timeout_elbi(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+ timeout_us) \
+ hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+ timeout_us, true)
+
/*
* address in this macro points always to a memory location in the
* host's (server's) memory. That location is updated asynchronously
@@ -2540,31 +2626,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(cond) ? 0 : -ETIMEDOUT; \
})

-#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
- timeout_us) \
-({ \
- ktime_t __timeout; \
- if (hdev->pdev) \
- __timeout = ktime_add_us(ktime_get(), timeout_us); \
- else \
- __timeout = ktime_add_us(ktime_get(),\
- min((u64)(timeout_us * 10), \
- (u64) HL_SIM_MAX_TIMEOUT_US)); \
- might_sleep_if(sleep_us); \
- for (;;) { \
- (val) = readl(addr); \
- if (cond) \
- break; \
- if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
- (val) = readl(addr); \
- break; \
- } \
- if (sleep_us) \
- usleep_range((sleep_us >> 2) + 1, sleep_us); \
- } \
- (cond) ? 0 : -ETIMEDOUT; \
-})
-
#define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \
({ \
struct user_mapped_block *p = blk; \
--
2.25.1