[PATCH 3/3] habanalabs: add support for notification via eventfd

From: Oded Gabbay
Date: Mon May 09 2022 - 04:57:41 EST


From: Tal Cohen <talcohen@xxxxxxxxx>

The driver will be able to send notification events towards
a user process, using user's registered event file descriptor.
The driver uses the notification mechanism to inform the
user about an occurred event.
A user thread can wait until a notification is received from
the driver.

The driver stores the occurred event until the user reads it,
using HL_INFO_GET_EVENTS - new ioctl opcode in the INFO ioctl.

Gaudi specific implementation includes sending a notification
on a TPC assertion event that is received from f/w.

Signed-off-by: Tal Cohen <talcohen@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/device.c | 52 +++++++++++++++
drivers/misc/habanalabs/common/habanalabs.h | 40 ++++++++----
.../misc/habanalabs/common/habanalabs_drv.c | 9 +++
.../misc/habanalabs/common/habanalabs_ioctl.c | 65 +++++++++++++++++++
drivers/misc/habanalabs/gaudi/gaudi.c | 14 +++-
include/uapi/misc/habanalabs.h | 15 +++++
6 files changed, 182 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 15df89b31e1b..315510aaca35 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -285,6 +285,14 @@ static void hpriv_release(struct kref *ref)

hdev->compute_ctx_in_release = 0;

+ /* release the eventfd */
+ if (hpriv->notifier_event.eventfd) {
+ eventfd_ctx_put(hpriv->notifier_event.eventfd);
+ hpriv->notifier_event.eventfd = 0;
+ }
+
+ mutex_destroy(&hpriv->notifier_event.lock);
+
kfree(hpriv);
}

@@ -355,6 +363,13 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_ctrl_list_lock);
out:
+ /* release the eventfd */
+ if (hpriv->notifier_event.eventfd) {
+ eventfd_ctx_put(hpriv->notifier_event.eventfd);
+ hpriv->notifier_event.eventfd = 0;
+ }
+
+ mutex_destroy(&hpriv->notifier_event.lock);
put_pid(hpriv->taskpid);

kfree(hpriv);
@@ -1506,6 +1521,43 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
return rc;
}

+static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event)
+{
+ mutex_lock(&notifier_event->lock);
+ notifier_event->events_mask |= event;
+ if (notifier_event->eventfd)
+ eventfd_signal(notifier_event->eventfd, 1);
+
+ mutex_unlock(&notifier_event->lock);
+}
+
+/*
+ * hl_notifier_event_send_all - notify all user processes via eventfd
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @event: the occurred event
+ * Returns 0 for success or an error on failure.
+ */
+void hl_notifier_event_send_all(struct hl_device *hdev, u64 event)
+{
+ struct hl_fpriv *hpriv;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
+ hl_notifier_event_send(&hpriv->notifier_event, event);
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ /* control device */
+ mutex_lock(&hdev->fpriv_ctrl_list_lock);
+
+ list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
+ hl_notifier_event_send(&hpriv->notifier_event, event);
+
+ mutex_unlock(&hdev->fpriv_ctrl_list_lock);
+}
+
/*
* hl_device_init - main initialization function for habanalabs device
*
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 918e8a04acab..8977ec67dba7 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -21,6 +21,7 @@
#include <linux/hashtable.h>
#include <linux/debugfs.h>
#include <linux/rwsem.h>
+#include <linux/eventfd.h>
#include <linux/bitfield.h>
#include <linux/genalloc.h>
#include <linux/sched/signal.h>
@@ -1932,6 +1933,18 @@ struct hl_debug_params {
bool enable;
};

+/**
+ * struct hl_notifier_event - holds the notifier data structure
+ * @eventfd: the event file descriptor to raise the notifications
+ * @lock: mutex lock to protect the notifier data flows
+ * @events_mask: indicates the bitmap events
+ */
+struct hl_notifier_event {
+ struct eventfd_ctx *eventfd;
+ struct mutex lock;
+ u64 events_mask;
+};
+
/*
* FILE PRIVATE STRUCTURE
*/
@@ -1943,24 +1956,25 @@ struct hl_debug_params {
* @taskpid: current process ID.
* @ctx: current executing context. TODO: remove for multiple ctx per process
* @ctx_mgr: context manager to handle multiple context for this FD.
- * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
* @mem_mgr: manager descriptor for memory exportable via mmap
+ * @notifier_event: notifier eventfd towards user process
* @debugfs_list: list of relevant ASIC debugfs.
* @dev_node: node in the device list of file private data
* @refcount: number of related contexts.
* @restore_phase_mutex: lock for context switch and restore phase.
*/
struct hl_fpriv {
- struct hl_device *hdev;
- struct file *filp;
- struct pid *taskpid;
- struct hl_ctx *ctx;
- struct hl_ctx_mgr ctx_mgr;
- struct hl_mem_mgr mem_mgr;
- struct list_head debugfs_list;
- struct list_head dev_node;
- struct kref refcount;
- struct mutex restore_phase_mutex;
+ struct hl_device *hdev;
+ struct file *filp;
+ struct pid *taskpid;
+ struct hl_ctx *ctx;
+ struct hl_ctx_mgr ctx_mgr;
+ struct hl_mem_mgr mem_mgr;
+ struct hl_notifier_event notifier_event;
+ struct list_head debugfs_list;
+ struct list_head dev_node;
+ struct kref refcount;
+ struct mutex restore_phase_mutex;
};


@@ -2676,8 +2690,8 @@ struct hl_reset_info {
* @state_dump_specs: constants and dictionaries needed to dump system state.
* @multi_cs_completion: array of multi-CS completion.
* @clk_throttling: holds information about current/previous clock throttling events
- * @reset_info: holds current device reset information.
* @last_error: holds information about last session in which CS timeout or razwi error occurred.
+ * @reset_info: holds current device reset information.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_major_version: major version of current loaded preboot
* @dram_used_mem: current DRAM memory consumption.
@@ -3071,6 +3085,8 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
int hl_build_hwmon_channel_info(struct hl_device *hdev,
struct cpucp_sensor *sensors_arr);

+void hl_notifier_event_send_all(struct hl_device *hdev, u64 event);
+
int hl_sysfs_init(struct hl_device *hdev);
void hl_sysfs_fini(struct hl_device *hdev);

diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 1210de39d661..c97173e9507d 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -134,6 +134,10 @@ int hl_device_open(struct inode *inode, struct file *filp)
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
+ hpriv->notifier_event.events_mask = 0;
+ hpriv->notifier_event.eventfd = 0;
+
+ mutex_init(&hpriv->notifier_event.lock);
mutex_init(&hpriv->restore_phase_mutex);
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
@@ -208,6 +212,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->restore_phase_mutex);
+ mutex_destroy(&hpriv->notifier_event.lock);
put_pid(hpriv->taskpid);

kfree(hpriv);
@@ -241,6 +246,10 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
+ hpriv->notifier_event.events_mask = 0;
+ hpriv->notifier_event.eventfd = 0;
+
+ mutex_init(&hpriv->notifier_event.lock);
nonseekable_open(inode, filp);

hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index bfb5cfe68110..d1ef56a8d3ac 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -116,6 +116,25 @@ static int hw_events_info(struct hl_device *hdev, bool aggregate,
return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
}

+static int events_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ int rc;
+ u32 max_size = args->return_size;
+ u64 events_mask;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((max_size < sizeof(u64)) || (!out))
+ return -EINVAL;
+
+ mutex_lock(&hpriv->notifier_event.lock);
+ events_mask = hpriv->notifier_event.events_mask;
+ hpriv->notifier_event.events_mask = 0;
+ mutex_unlock(&hpriv->notifier_event.lock);
+
+ rc = copy_to_user(out, &events_mask, sizeof(u64));
+ return rc;
+}
+
static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
@@ -614,6 +633,43 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}

+static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ int rc;
+
+ /* check if there is already a registered on that process */
+ mutex_lock(&hpriv->notifier_event.lock);
+ if (hpriv->notifier_event.eventfd) {
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return -EINVAL;
+ }
+
+ hpriv->notifier_event.eventfd = eventfd_ctx_fdget(args->eventfd);
+ if (IS_ERR(hpriv->notifier_event.eventfd)) {
+ rc = PTR_ERR(hpriv->notifier_event.eventfd);
+ hpriv->notifier_event.eventfd = 0;
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return rc;
+ }
+
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return 0;
+}
+
+static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ mutex_lock(&hpriv->notifier_event.lock);
+ if (!hpriv->notifier_event.eventfd) {
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return -EINVAL;
+ }
+
+ eventfd_ctx_put(hpriv->notifier_event.eventfd);
+ hpriv->notifier_event.eventfd = 0;
+ mutex_unlock(&hpriv->notifier_event.lock);
+ return 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -667,6 +723,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES:
return dev_mem_alloc_page_sizes_info(hpriv, args);

+ case HL_INFO_GET_EVENTS:
+ return events_info(hpriv, args);
+
default:
break;
}
@@ -717,6 +776,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DRAM_PENDING_ROWS:
return dram_pending_rows_info(hpriv, args);

+ case HL_INFO_REGISTER_EVENTFD:
+ return eventfd_register(hpriv, args);
+
+ case HL_INFO_UNREGISTER_EVENTFD:
+ return eventfd_unregister(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -EINVAL;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 08cd60300b4f..1c388537de33 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7879,7 +7879,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_MMU_PAGE_FAULT:
case GAUDI_EVENT_MMU_WR_PERM:
case GAUDI_EVENT_RAZWI_OR_ADC:
- case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
fallthrough;
@@ -7899,6 +7898,19 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
hl_fw_unmask_irq(hdev, event_type);
break;

+ case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
+ gaudi_print_irq_info(hdev, event_type, true);
+ gaudi_handle_qman_err(hdev, event_type);
+ hl_fw_unmask_irq(hdev, event_type);
+
+ /* In TPC QM event, notify on TPC assertion. While there isn't
+ * a specific event for assertion yet, the FW generates QM event.
+ * The SW upper layer will inspect an internal mapped area to indicate
+ * if the event is a tpc assertion or tpc QM.
+ */
+ hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT);
+ break;
+
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
gaudi_print_irq_info(hdev, event_type, true);
goto reset_device;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 3576bf2b4841..52540d5b4fc9 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -349,6 +349,9 @@ enum hl_server_type {
* Razwi initiator.
* Razwi cause, was it a page fault or MMU access error.
* HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation
+ * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications.
+ * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
+ * HL_INFO_GET_EVENTS - Retrieve the last occurred events
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@@ -374,6 +377,9 @@ enum hl_server_type {
#define HL_INFO_CS_TIMEOUT_EVENT 24
#define HL_INFO_RAZWI_EVENT 25
#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26
+#define HL_INFO_REGISTER_EVENTFD 28
+#define HL_INFO_UNREGISTER_EVENTFD 29
+#define HL_INFO_GET_EVENTS 30

#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
@@ -679,6 +685,7 @@ enum gaudi_dcores {
* @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms
* resolution. Currently not in use.
* @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
+ * @eventfd: event file descriptor for event notifications.
* @pad: Padding to 64 bit.
*/
struct hl_info_args {
@@ -691,6 +698,7 @@ struct hl_info_args {
__u32 ctx_id;
__u32 period_ms;
__u32 pll_index;
+ __u32 eventfd;
};

__u32 pad;
@@ -1390,6 +1398,13 @@ struct hl_debug_args {
__u32 ctx_id;
};

+/*
+ * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command
+ *
+ * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event
+ */
+#define HL_NOTIFIER_EVENT_TPC_ASSERT (1 << 0)
+
/*
* Various information operations such as:
* - H/W IP information
--
2.25.1