[PATCH 1/8] habanalabs: initialize device CPU queues after MMU init

From: Oded Gabbay
Date: Tue Jun 11 2019 - 01:55:58 EST


This patch changes the order of H/W IP initializations. The MMU needs to
be initialized before the device CPU queues, because the CPU will go
through the ASIC MMU in order to reach the host memory (where the queues
are located).

Signed-off-by: Oded Gabbay <oded.gabbay@xxxxxxxxx>
---
drivers/misc/habanalabs/asid.c | 2 +-
drivers/misc/habanalabs/device.c | 22 +++++-----
drivers/misc/habanalabs/goya/goya.c | 64 +++++++++++++----------------
3 files changed, 40 insertions(+), 48 deletions(-)

diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c
index f54e7971a762..2c01461701a3 100644
--- a/drivers/misc/habanalabs/asid.c
+++ b/drivers/misc/habanalabs/asid.c
@@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)

mutex_init(&hdev->asid_mutex);

- /* ASID 0 is reserved for KMD */
+ /* ASID 0 is reserved for KMD and device CPU */
set_bit(0, hdev->asid_bitmap);

return 0;
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index cca4af29daf7..4df8ef88ce2d 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -326,7 +326,15 @@ static int device_late_init(struct hl_device *hdev)
{
int rc;

- INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
+ if (hdev->asic_funcs->late_init) {
+ rc = hdev->asic_funcs->late_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed late initialization for the H/W\n");
+ return rc;
+ }
+ }
+
hdev->high_pll = hdev->asic_prop.high_pll;

/* force setting to low frequency */
@@ -337,17 +345,9 @@ static int device_late_init(struct hl_device *hdev)
else
hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);

- if (hdev->asic_funcs->late_init) {
- rc = hdev->asic_funcs->late_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "failed late initialization for the H/W\n");
- return rc;
- }
- }
-
+ INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
schedule_delayed_work(&hdev->work_freq,
- usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));

if (hdev->heartbeat) {
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 81c1d576783f..106074466dca 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -539,9 +539,32 @@ int goya_late_init(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;

+ goya_fetch_psoc_frequency(hdev);
+
+ rc = goya_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to clear MMU page tables range %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_mmu_set_dram_default_page(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_init_cpu_queues(hdev);
+ if (rc)
+ return rc;
+
+ rc = goya_test_cpu_queue(hdev);
+ if (rc)
+ return rc;
+
rc = goya_armcp_info_get(hdev);
if (rc) {
- dev_err(hdev->dev, "Failed to get armcp info\n");
+ dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
return rc;
}

@@ -553,33 +576,15 @@ int goya_late_init(struct hl_device *hdev)

rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
if (rc) {
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ dev_err(hdev->dev,
+ "Failed to enable PCI access from CPU %d\n", rc);
return rc;
}

WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);

- goya_fetch_psoc_frequency(hdev);
-
- rc = goya_mmu_clear_pgt_range(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
- goto disable_pci_access;
- }
-
- rc = goya_mmu_set_dram_default_page(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to set DRAM default page\n");
- goto disable_pci_access;
- }
-
return 0;
-
-disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
-
- return rc;
}

/*
@@ -1000,7 +1005,7 @@ int goya_init_cpu_queues(struct hl_device *hdev)

if (err) {
dev_err(hdev->dev,
- "Failed to communicate with ARM CPU (ArmCP timeout)\n");
+ "Failed to setup communication with device CPU\n");
return -EIO;
}

@@ -2465,13 +2470,6 @@ static int goya_hw_init(struct hl_device *hdev)
if (rc)
goto disable_queues;

- rc = goya_init_cpu_queues(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
- rc);
- goto disable_msix;
- }
-
/*
* Check if we managed to set the DMA mask to more then 32 bits. If so,
* let's try to increase it again because in Goya we set the initial
@@ -2481,7 +2479,7 @@ static int goya_hw_init(struct hl_device *hdev)
if (hdev->dma_mask > 32) {
rc = hl_pci_set_dma_mask(hdev, 48);
if (rc)
- goto disable_pci_access;
+ goto disable_msix;
}

/* Perform read from the device to flush all MSI-X configuration */
@@ -2489,8 +2487,6 @@ static int goya_hw_init(struct hl_device *hdev)

return 0;

-disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
disable_msix:
goya_disable_msix(hdev);
disable_queues:
@@ -2972,10 +2968,6 @@ int goya_test_queues(struct hl_device *hdev)
ret_val = -EINVAL;
}

- rc = goya_test_cpu_queue(hdev);
- if (rc)
- ret_val = -EINVAL;
-
return ret_val;
}

--
2.17.1