[PATCH net-next 01/12] net: hns3: log detail error info of ROCEE ECC and AXI errors

From: Huazhong Tan
Date: Thu Jun 06 2019 - 04:27:40 EST


From: Xiaofei Tan <tanxiaofei@xxxxxxxxxx>

This patch logs detail error info of ROCEE ECC and AXI errors for
debug purpose, and remove unnecessary reset for ROCEE overflow
errors.

Signed-off-by: Xiaofei Tan <tanxiaofei@xxxxxxxxxx>
Signed-off-by: Huazhong Tan <tanhuazhong@xxxxxxxxxx>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 87 ++++++++++++++++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h | 1 +
3 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 61cb10d..5e6c749 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -268,6 +268,8 @@ enum hclge_opcode_type {
HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580,
HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584,
+ HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD = 0x1585,
+ HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD = 0x1586,
HCLGE_IGU_EGU_TNL_INT_EN = 0x1803,
HCLGE_IGU_COMMON_INT_EN = 0x1806,
HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 784512d..64defd3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1388,6 +1388,66 @@ static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
return ret;
}

+static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
+{
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_desc desc[3];
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+ true);
+ hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+ true);
+ hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
+ true);
+ desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+ desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
+ if (ret) {
+ dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
+ return ret;
+ }
+
+ dev_info(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
+ le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+ le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+ le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+ dev_info(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
+ le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
+ le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
+ le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
+ dev_info(dev, "AXI3: %08X %08X %08X %08X\n",
+ le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
+ le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
+
+ return 0;
+}
+
+static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
+{
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_desc desc[2];
+ int ret;
+
+ ret = hclge_cmd_query_error(hdev, &desc[0],
+ HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
+ HCLGE_CMD_FLAG_NEXT, 0, 0);
+ if (ret) {
+ dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
+ return ret;
+ }
+
+ dev_info(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
+ le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
+ le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
+ le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
+ dev_info(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
+ le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
+
+ return 0;
+}
+
static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
{
struct device *dev = &hdev->pdev->dev;
@@ -1456,19 +1516,33 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)

status = le32_to_cpu(desc[0].data[0]);

- if (status & HCLGE_ROCEE_RERR_INT_MASK) {
- dev_warn(dev, "ROCEE RAS AXI rresp error\n");
- reset_type = HNAE3_FUNC_RESET;
- }
+ if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
+ if (status & HCLGE_ROCEE_RERR_INT_MASK)
+ dev_warn(dev, "ROCEE RAS AXI rresp error\n");
+
+ if (status & HCLGE_ROCEE_BERR_INT_MASK)
+ dev_warn(dev, "ROCEE RAS AXI bresp error\n");

- if (status & HCLGE_ROCEE_BERR_INT_MASK) {
- dev_warn(dev, "ROCEE RAS AXI bresp error\n");
reset_type = HNAE3_FUNC_RESET;
+
+ ret = hclge_log_rocee_axi_error(hdev);
+ if (ret) {
+ dev_err(dev, "failed(%d) to process axi error\n", ret);
+ /* reset everything for now */
+ return HNAE3_GLOBAL_RESET;
+ }
}

if (status & HCLGE_ROCEE_ECC_INT_MASK) {
dev_warn(dev, "ROCEE RAS 2bit ECC error\n");
reset_type = HNAE3_GLOBAL_RESET;
+
+ ret = hclge_log_rocee_ecc_error(hdev);
+ if (ret) {
+ dev_err(dev, "failed(%d) to process ecc error\n", ret);
+ /* reset everything for now */
+ return HNAE3_GLOBAL_RESET;
+ }
}

if (status & HCLGE_ROCEE_OVF_INT_MASK) {
@@ -1478,7 +1552,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
/* reset everything for now */
return HNAE3_GLOBAL_RESET;
}
- reset_type = HNAE3_FUNC_RESET;
}

/* clear error status */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 81d115a..6684733 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -94,6 +94,7 @@
#define HCLGE_ROCEE_RAS_CE_INT_EN_MASK 0x1
#define HCLGE_ROCEE_RERR_INT_MASK BIT(0)
#define HCLGE_ROCEE_BERR_INT_MASK BIT(1)
+#define HCLGE_ROCEE_AXI_ERR_INT_MASK GENMASK(1, 0)
#define HCLGE_ROCEE_ECC_INT_MASK BIT(2)
#define HCLGE_ROCEE_OVF_INT_MASK BIT(3)
#define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000
--
2.7.4