[PATCH 4/4] habanalabs: Add IRQ handler for CS completions

From: Tomer Tayar
Date: Thu Oct 03 2019 - 04:42:33 EST


This patch adds an IRQ handler for CS completions of CS jobs which are
sent on H/W queues.
The patch adds a CS shadow queue, from which the handler retrieves the
CS, and a dedicated workqueue, on which the handler queues a work to
free the CS jobs.

Signed-off-by: Tomer Tayar <ttayar@xxxxxxxxx>
---
drivers/misc/habanalabs/command_submission.c | 16 +++++++
drivers/misc/habanalabs/device.c | 27 +++++++++++-
drivers/misc/habanalabs/habanalabs.h | 18 ++++++++
drivers/misc/habanalabs/hw_queue.c | 2 +
drivers/misc/habanalabs/irq.c | 46 ++++++++++++++++++++
5 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 25dc7308da19..b995a02a31dd 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -267,6 +267,8 @@ static void cs_do_release(struct kref *ref)

hl_ctx_put(cs->ctx);

+ hdev->shadow_cs_queue[cs->sequence & (HL_MAX_PENDING_CS - 1)] = NULL;
+
if (cs->timedout)
dma_fence_set_error(cs->fence, -ETIMEDOUT);
else if (cs->aborted)
@@ -391,6 +393,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)

/* flush all completions */
flush_workqueue(hdev->cq_wq);
+ flush_workqueue(hdev->cs_cmplt_wq);

/* Make sure we don't have leftovers in the H/W queues mirror list */
list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
@@ -415,6 +418,16 @@ static void job_wq_completion(struct work_struct *work)
free_job(hdev, job);
}

+static void cs_completion(struct work_struct *work)
+{
+ struct hl_cs *cs = container_of(work, struct hl_cs, finish_work);
+ struct hl_device *hdev = cs->ctx->hdev;
+ struct hl_cs_job *job, *tmp;
+
+ list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+ free_job(hdev, job);
+}
+
static int validate_queue_index(struct hl_device *hdev,
struct hl_cs_chunk *chunk,
enum hl_queue_type *queue_type,
@@ -625,6 +638,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
goto free_cs_object;
}

+ if (job->queue_type == QUEUE_TYPE_HW)
+ INIT_WORK(&cs->finish_work, cs_completion);
+
rc = hl_hw_queue_schedule_cs(cs);
if (rc) {
dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 2f5a4da707e7..6c13f05c3120 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -284,11 +284,19 @@ static int device_early_init(struct hl_device *hdev)
goto free_cq_wq;
}

+ hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0);
+ if (!hdev->cs_cmplt_wq) {
+ dev_err(hdev->dev,
+ "Failed to allocate CS completions workqueue\n");
+ rc = -ENOMEM;
+ goto free_eq_wq;
+ }
+
hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
GFP_KERNEL);
if (!hdev->hl_chip_info) {
rc = -ENOMEM;
- goto free_eq_wq;
+ goto free_cs_cmplt_wq;
}

hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
@@ -314,6 +322,8 @@ static int device_early_init(struct hl_device *hdev)

free_chip_info:
kfree(hdev->hl_chip_info);
+free_cs_cmplt_wq:
+ destroy_workqueue(hdev->cs_cmplt_wq);
free_eq_wq:
destroy_workqueue(hdev->eq_wq);
free_cq_wq:
@@ -346,6 +356,7 @@ static void device_early_fini(struct hl_device *hdev)
kfree(hdev->idle_busy_ts_arr);
kfree(hdev->hl_chip_info);

+ destroy_workqueue(hdev->cs_cmplt_wq);
destroy_workqueue(hdev->eq_wq);
destroy_workqueue(hdev->cq_wq);

@@ -1138,6 +1149,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
}
}

+ hdev->shadow_cs_queue = kmalloc_array(HL_MAX_PENDING_CS,
+ sizeof(*hdev->shadow_cs_queue),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!hdev->shadow_cs_queue) {
+ rc = -ENOMEM;
+ goto cq_fini;
+ }
+
/*
* Initialize the event queue. Must be done before hw_init,
* because there the address of the event queue is being
@@ -1146,7 +1165,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
rc = hl_eq_init(hdev, &hdev->event_queue);
if (rc) {
dev_err(hdev->dev, "failed to initialize event queue\n");
- goto cq_fini;
+ goto free_shadow_cs_queue;
}

/* MMU S/W must be initialized before kernel context is created */
@@ -1269,6 +1288,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hl_mmu_fini(hdev);
eq_fini:
hl_eq_fini(hdev, &hdev->event_queue);
+free_shadow_cs_queue:
+ kfree(hdev->shadow_cs_queue);
cq_fini:
for (i = 0 ; i < cq_ready_cnt ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
@@ -1383,6 +1404,8 @@ void hl_device_fini(struct hl_device *hdev)

hl_eq_fini(hdev, &hdev->event_queue);

+ kfree(hdev->shadow_cs_queue);
+
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index c1af83f96415..2efb5e1e62cb 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -722,6 +722,7 @@ struct hl_userptr {
* @job_lock: spinlock for the CS's jobs list. Needed for free_job.
* @refcount: reference counter for usage of the CS.
* @fence: pointer to the fence object of this CS.
+ * @finish_work: workqueue object to run when CS is completed by H/W.
* @work_tdr: delayed work node for TDR.
* @mirror_node : node in device mirror list of command submissions.
* @debugfs_list: node in debugfs list of command submissions.
@@ -741,6 +742,7 @@ struct hl_cs {
spinlock_t job_lock;
struct kref refcount;
struct dma_fence *fence;
+ struct work_struct finish_work;
struct delayed_work work_tdr;
struct list_head mirror_node;
struct list_head debugfs_list;
@@ -1203,8 +1205,12 @@ struct hl_device_idle_busy_ts {
* @asic_name: ASIC specific nmae.
* @asic_type: ASIC specific type.
* @completion_queue: array of hl_cq.
+ * @shadow_cs_queue: pointer to a shadow queue that holds pointers to
+ * outstanding command submissions.
* @cq_wq: work queue of completion queues for executing work in process context
* @eq_wq: work queue of event queue for executing work in process context.
+ * @cs_cmplt_wq: work queue of CS completions for executing work in process
+ * context.
* @kernel_ctx: Kernel driver context structure.
* @kernel_queues: array of hl_hw_queue.
* @hw_queues_mirror_list: CS mirror list for TDR.
@@ -1284,8 +1290,10 @@ struct hl_device {
char asic_name[16];
enum hl_asic_type asic_type;
struct hl_cq *completion_queue;
+ struct hl_cs **shadow_cs_queue;
struct workqueue_struct *cq_wq;
struct workqueue_struct *eq_wq;
+ struct workqueue_struct *cs_cmplt_wq;
struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues;
struct list_head hw_queues_mirror_list;
@@ -1359,6 +1367,15 @@ struct hl_device {
u8 pldm;
};

+/**
+ * struct hl_cs_irq_info - IRQ info structure for CS completion interrupt.
+ * @hdev: pointer to habanalabs device structure.
+ * @relative_idx: CS completion relative interrupt index (0-based).
+ */
+struct hl_cs_irq_info {
+ struct hl_device *hdev;
+ int relative_idx;
+};

/*
* IOCTLs
@@ -1470,6 +1487,7 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+irqreturn_t hl_irq_handler_cs_cmplt(int irq, void *arg);
u32 hl_cq_inc_ptr(u32 ptr);

int hl_asid_init(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index a1205ae47250..7b80e571a27c 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -469,6 +469,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
goto unroll_cq_resv;
}

+ hdev->shadow_cs_queue[cs->sequence & (HL_MAX_PENDING_CS - 1)] = cs;
+
spin_lock(&hdev->hw_queues_mirror_lock);
list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);

diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
index fac65fbd70e8..93fa13218dd4 100644
--- a/drivers/misc/habanalabs/irq.c
+++ b/drivers/misc/habanalabs/irq.c
@@ -205,6 +205,52 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
return IRQ_HANDLED;
}

+/*
+ * hl_irq_handler_cs_cmplt() - irq handler for CS completions.
+ * @irq: IRQ number
+ * @arg: pointer to hl_device structure.
+ */
+irqreturn_t hl_irq_handler_cs_cmplt(int irq, void *arg)
+{
+ struct hl_cs_irq_info *cs_irq_info = arg;
+ struct hl_device *hdev = cs_irq_info->hdev;
+ struct hl_cs *cs;
+ struct hl_cs_job *job;
+ struct hl_cq *cq;
+ int relative_idx = cs_irq_info->relative_idx;
+
+ if (hdev->disabled) {
+ dev_dbg(hdev->dev,
+ "Device disabled but received IRQ %d for CS completion\n",
+ irq);
+ goto out;
+ }
+
+ cs = hdev->shadow_cs_queue[relative_idx & (HL_MAX_PENDING_CS - 1)];
+ if (!cs) {
+ dev_warn(hdev->dev,
+ "No pointer to CS in shadow array at index %d\n",
+ relative_idx);
+ goto out;
+ }
+
+ queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
+
+ /*
+ * The same CQs can be accessed from parallel IRQ handlers that handle
+ * the completion of different CSs. However, locking is not needed
+ * because the "free_slots_cnt" variable is atomic.
+ * There is no need to update the CI counters of the queues/CQs, as they
+ * are not needed/used for the H/W queue type.
+ */
+ list_for_each_entry(job, &cs->job_list, cs_node) {
+ cq = &hdev->completion_queue[job->hw_queue_id];
+ atomic_inc(&cq->free_slots_cnt);
+ }
+out:
+ return IRQ_HANDLED;
+}
+
/*
* hl_cq_init - main initialization function for an cq object
*
--
2.17.1