[PATCH 06/11] habanalabs: add new return code to device fd open

From: Oded Gabbay
Date: Wed Mar 16 2022 - 07:42:05 EST


From: Ofir Bitton <obitton@xxxxxxxxx>

In order to be more informative during device open, we are adding a
new return code -EAGAIN that indicates device is still going through
resource reclaiming and hence it cannot be used yet.

Signed-off-by: Ofir Bitton <obitton@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/device.c | 4 ++++
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
drivers/misc/habanalabs/common/habanalabs_drv.c | 15 ++++++++++++++-
3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index dc9341a64541..3eb392b4308a 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -107,6 +107,8 @@ static void hpriv_release(struct kref *ref)
hdev->is_compute_ctx_active = false;
mutex_unlock(&hdev->fpriv_list_lock);

+ hdev->compute_ctx_in_release = 0;
+
kfree(hpriv);
}

@@ -150,6 +152,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);

+ hdev->compute_ctx_in_release = 1;
+
if (!hl_hpriv_put(hpriv))
dev_notice(hdev->dev,
"User process closed FD but device still in use\n");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 564797766f42..0079f43bc596 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2710,6 +2710,7 @@ struct hl_reset_info {
* cases where Linux was not loaded to device CPU
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
* @is_compute_ctx_active: Whether there is an active compute context executing.
+ * @compute_ctx_in_release: true if the current compute context is being released.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -2828,6 +2829,7 @@ struct hl_device {
u8 supports_wait_for_multi_cs;
u8 stream_master_qid_arr_size;
u8 is_compute_ctx_active;
+ u8 compute_ctx_in_release;

/* Parameters for bring-up */
u64 nic_ports_mask;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index ca404ed9d9a7..e870c32a0733 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -150,7 +150,20 @@ int hl_device_open(struct inode *inode, struct file *filp)
dev_err_ratelimited(hdev->dev,
"Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]);
- rc = -EPERM;
+
+ if (status == HL_DEVICE_STATUS_IN_RESET)
+ rc = -EAGAIN;
+ else
+ rc = -EPERM;
+
+ goto out_err;
+ }
+
+ if (hdev->compute_ctx_in_release) {
+ dev_dbg_ratelimited(hdev->dev,
+ "Can't open %s because another user is still releasing it\n",
+ dev_name(hdev->dev));
+ rc = -EAGAIN;
goto out_err;
}

--
2.25.1