[PATCH 2/2] habanalabs: add uapi to retrieve aggregate H/W events

From: Oded Gabbay
Date: Fri Aug 30 2019 - 06:57:09 EST


Add a new opcode to INFO IOCTL to retrieve aggregate H/W events. i.e. the
events counters are NOT cleared upon device reset, but count from the
loading of the driver.

Add the code to support it in the device event handling function.

Signed-off-by: Oded Gabbay <oded.gabbay@xxxxxxxxx>
---
drivers/misc/habanalabs/goya/goya.c | 9 +++++++--
drivers/misc/habanalabs/goya/goyaP.h | 3 ++-
drivers/misc/habanalabs/habanalabs.h | 3 ++-
drivers/misc/habanalabs/habanalabs_ioctl.c | 11 ++++++++---
include/uapi/misc/habanalabs.h | 3 +++
5 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 0dd0b4429fee..1267ec75b19f 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4469,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
struct goya_device *goya = hdev->asic_specific;

goya->events_stat[event_type]++;
+ goya->events_stat_aggregate[event_type]++;

switch (event_type) {
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
@@ -4550,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
}
}

-void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
{
struct goya_device *goya = hdev->asic_specific;

- *size = (u32) sizeof(goya->events_stat);
+ if (aggregate) {
+ *size = (u32) sizeof(goya->events_stat_aggregate);
+ return goya->events_stat_aggregate;
+ }

+ *size = (u32) sizeof(goya->events_stat);
return goya->events_stat;
}

diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index d7f48c9c41cd..f830cfd5c04d 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -162,6 +162,7 @@ struct goya_device {

u64 ddr_bar_cur_addr;
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
+ u32 events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized;
u8 device_cpu_mmu_mappings_done;
};
@@ -215,7 +216,7 @@ int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);

void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);

void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 23b86b7f9732..aa7aaa710f12 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -558,7 +558,8 @@ struct hl_asic_funcs {
struct hl_eq_entry *eq_entry);
void (*set_pll_profile)(struct hl_device *hdev,
enum hl_pll_frequency freq);
- void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
+ void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
+ u32 *size);
u64 (*read_pte)(struct hl_device *hdev, u64 addr);
void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 6325e98a5ae9..bb84b6dc2223 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -75,7 +75,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
}

-static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
+static int hw_events_info(struct hl_device *hdev, bool aggregate,
+ struct hl_info_args *args)
{
u32 size, max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
@@ -84,7 +85,7 @@ static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;

- arr = hdev->asic_funcs->get_events_stat(hdev, &size);
+ arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);

return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
}
@@ -251,7 +252,7 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,

switch (args->op) {
case HL_INFO_HW_EVENTS:
- rc = hw_events_info(hdev, args);
+ rc = hw_events_info(hdev, false, args);
break;

case HL_INFO_DRAM_USAGE:
@@ -266,6 +267,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
rc = device_utilization(hdev, args);
break;

+ case HL_INFO_HW_EVENTS_AGGREGATE:
+ rc = hw_events_info(hdev, true, args);
+ break;
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 73ee212d7fa6..19f8039db2ea 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -93,6 +93,8 @@ enum hl_device_status {
* The period can be between 100ms to 1s, in
* resolution of 100ms. The return value is a
* percentage of the utilization rate.
+ * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
+ * event occurred since the driver was loaded.
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@@ -100,6 +102,7 @@ enum hl_device_status {
#define HL_INFO_HW_IDLE 3
#define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_DEVICE_UTILIZATION 6
+#define HL_INFO_HW_EVENTS_AGGREGATE 7

#define HL_INFO_VERSION_MAX_LEN 128

--
2.17.1