[PATCH 1/5] habanalabs: add reset support when user closes FD

From: Oded Gabbay
Date: Mon Feb 22 2021 - 17:08:56 EST


From: Ofir Bitton <obitton@xxxxxxxxx>

In order to support command submissions that are done directly from
user space, the driver must perform soft reset once user closes its FD.
In case the soft reset fails or device is not idle, a hard reset should
be performed.

Signed-off-by: Ofir Bitton <obitton@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/device.c | 21 +++++++++++++++++++--
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 15fcb5c31c4b..ed1838c15c78 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -93,9 +93,26 @@ void hl_hpriv_put(struct hl_fpriv *hpriv)
static int hl_device_release(struct inode *inode, struct file *filp)
{
struct hl_fpriv *hpriv = filp->private_data;
+ struct hl_device *hdev = hpriv->hdev;

- hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
- hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+ hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
+ hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
+
+ if (hdev->reset_upon_device_release) {
+ u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
+
+ /* We try soft reset first */
+ hl_device_reset(hdev, false, false);
+
+ /* If device is not idle perform hard reset */
+ if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
+ HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
+ dev_info(hdev->dev,
+ "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset",
+ idle_mask[0], idle_mask[1]);
+ hl_device_reset(hdev, true, false);
+ }
+ }

filp->private_data = NULL;

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index dc9f5a83dfc9..706361a81410 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1920,6 +1920,7 @@ struct hl_mmu_funcs {
* @device_fini_pending: true if device_fini was called and might be
* waiting for the reset thread to finish
* @supports_staged_submission: true if staged submissions are supported
+ * @reset_upon_device_release: true if reset is required upon device release
*/
struct hl_device {
struct pci_dev *pdev;
@@ -2026,6 +2027,7 @@ struct hl_device {
u8 process_kill_trial_cnt;
u8 device_fini_pending;
u8 supports_staged_submission;
+ u8 reset_upon_device_release;

/* Parameters for bring-up */
u64 nic_ports_mask;
--
2.25.1