[PATCH 5/9] habanalabs: add support for new cpucp return codes

From: Oded Gabbay
Date: Tue Sep 06 2022 - 11:33:20 EST


From: Ofir Bitton <obitton@xxxxxxxxx>

Firmware now responds with a more detailed cpucp return codes.
Driver can now distinguish between error and debug return codes.

Signed-off-by: Ofir Bitton <obitton@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/firmware_if.c | 34 +++++++++++++++++--
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 ++
.../misc/habanalabs/include/common/cpucp_if.h | 17 +++++++++-
4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 8bfb459a8282..c2375917fc02 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -252,7 +252,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
struct hl_bd *sent_bd;
- u32 tmp, expected_ack_val, pi;
+ u32 tmp, expected_ack_val, pi, opcode;
int rc;

pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
@@ -319,8 +319,35 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,

rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
if (rc) {
- dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
- rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT);
+ opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
+
+ if (!prop->supports_advanced_cpucp_rc) {
+ dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
+ goto scrub_descriptor;
+ }
+
+ switch (rc) {
+ case cpucp_packet_invalid:
+ dev_err(hdev->dev,
+ "CPU packet %d is not supported by F/W\n", opcode);
+ break;
+ case cpucp_packet_fault:
+ dev_err(hdev->dev,
+ "F/W failed processing CPU packet %d\n", opcode);
+ break;
+ case cpucp_packet_invalid_pkt:
+ dev_dbg(hdev->dev,
+ "CPU packet %d is not supported by F/W\n", opcode);
+ break;
+ case cpucp_packet_invalid_params:
+ dev_err(hdev->dev,
+ "F/W reports invalid parameters for CPU packet %d\n", opcode);
+ break;
+
+ default:
+ dev_err(hdev->dev,
+ "Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode);
+ }

/* propagate the return code from the f/w to the callers who want to check it */
if (result)
@@ -332,6 +359,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
*result = le64_to_cpu(pkt->result);
}

+scrub_descriptor:
/* Scrub previous buffer descriptor 'ctl' field which contains the
* previous PI value written during packet submission.
* We must do this or else F/W can read an old value upon queue wraparound.
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 5083c5098941..4d7681a2b40a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -678,6 +678,7 @@ struct hl_hints_range {
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
* @supports_user_set_page_size: true if user can set the allocation page size.
* @dma_mask: the dma mask to be set for this device
+ * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -785,6 +786,7 @@ struct asic_fixed_properties {
u8 set_max_power_on_device_init;
u8 supports_user_set_page_size;
u8 dma_mask;
+ u8 supports_advanced_cpucp_rc;
};

/**
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index a0b15b2f2ea4..db18e066509c 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -2721,6 +2721,8 @@ static int gaudi2_late_init(struct hl_device *hdev)
struct gaudi2_device *gaudi2 = hdev->asic_specific;
int rc;

+ hdev->asic_prop.supports_advanced_cpucp_rc = true;
+
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
gaudi2->virt_msix_db_dma_addr);
if (rc) {
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index b837bb1f4cd3..9593d1a26945 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -824,10 +824,25 @@ enum cpucp_led_index {
CPUCP_LED2_INDEX
};

+/*
+ * enum cpucp_packet_rc - Error return code
+ * @cpucp_packet_success -> in case of success.
+ * @cpucp_packet_invalid -> this is to support Goya and Gaudi platform.
+ * @cpucp_packet_fault -> in case of processing error like failing to
+ * get device binding or semaphore etc.
+ * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is
+ * supported Greco onwards.
+ * @cpucp_packet_invalid_params -> when checking parameter like length of buffer
+ * or attribute value etc. Supported Greco onwards.
+ * @cpucp_packet_rc_max -> It indicates size of enum so should be at last.
+ */
enum cpucp_packet_rc {
cpucp_packet_success,
cpucp_packet_invalid,
- cpucp_packet_fault
+ cpucp_packet_fault,
+ cpucp_packet_invalid_pkt,
+ cpucp_packet_invalid_params,
+ cpucp_packet_rc_max
};

/*
--
2.25.1