net: hns3: log detail error info of ROCEE ECC and AXI errors
This patch logs detail error info of ROCEE ECC and AXI errors for debug purpose, and remove unnecessary reset for ROCEE overflow errors. Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com> Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
900d96e418
commit
238882c8e0
3 changed files with 77 additions and 7 deletions
|
@ -268,6 +268,8 @@ enum hclge_opcode_type {
|
|||
HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580,
|
||||
HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
|
||||
HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584,
|
||||
HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD = 0x1585,
|
||||
HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD = 0x1586,
|
||||
HCLGE_IGU_EGU_TNL_INT_EN = 0x1803,
|
||||
HCLGE_IGU_COMMON_INT_EN = 0x1806,
|
||||
HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14,
|
||||
|
|
|
@ -1388,6 +1388,66 @@ static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
|
||||
{
|
||||
struct device *dev = &hdev->pdev->dev;
|
||||
struct hclge_desc desc[3];
|
||||
int ret;
|
||||
|
||||
hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
|
||||
true);
|
||||
hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
|
||||
true);
|
||||
hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
|
||||
true);
|
||||
desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
|
||||
desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
|
||||
|
||||
ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
dev_info(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
|
||||
le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
|
||||
le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
|
||||
le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
|
||||
dev_info(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
|
||||
le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
|
||||
le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
|
||||
le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
|
||||
dev_info(dev, "AXI3: %08X %08X %08X %08X\n",
|
||||
le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
|
||||
le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
|
||||
{
|
||||
struct device *dev = &hdev->pdev->dev;
|
||||
struct hclge_desc desc[2];
|
||||
int ret;
|
||||
|
||||
ret = hclge_cmd_query_error(hdev, &desc[0],
|
||||
HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
|
||||
HCLGE_CMD_FLAG_NEXT, 0, 0);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
dev_info(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
|
||||
le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
|
||||
le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
|
||||
le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
|
||||
dev_info(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
|
||||
le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
|
||||
{
|
||||
struct device *dev = &hdev->pdev->dev;
|
||||
|
@ -1456,19 +1516,27 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
|
|||
|
||||
status = le32_to_cpu(desc[0].data[0]);
|
||||
|
||||
if (status & HCLGE_ROCEE_RERR_INT_MASK) {
|
||||
dev_warn(dev, "ROCEE RAS AXI rresp error\n");
|
||||
reset_type = HNAE3_FUNC_RESET;
|
||||
}
|
||||
if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
|
||||
if (status & HCLGE_ROCEE_RERR_INT_MASK)
|
||||
dev_warn(dev, "ROCEE RAS AXI rresp error\n");
|
||||
|
||||
if (status & HCLGE_ROCEE_BERR_INT_MASK)
|
||||
dev_warn(dev, "ROCEE RAS AXI bresp error\n");
|
||||
|
||||
if (status & HCLGE_ROCEE_BERR_INT_MASK) {
|
||||
dev_warn(dev, "ROCEE RAS AXI bresp error\n");
|
||||
reset_type = HNAE3_FUNC_RESET;
|
||||
|
||||
ret = hclge_log_rocee_axi_error(hdev);
|
||||
if (ret)
|
||||
return HNAE3_GLOBAL_RESET;
|
||||
}
|
||||
|
||||
if (status & HCLGE_ROCEE_ECC_INT_MASK) {
|
||||
dev_warn(dev, "ROCEE RAS 2bit ECC error\n");
|
||||
reset_type = HNAE3_GLOBAL_RESET;
|
||||
|
||||
ret = hclge_log_rocee_ecc_error(hdev);
|
||||
if (ret)
|
||||
return HNAE3_GLOBAL_RESET;
|
||||
}
|
||||
|
||||
if (status & HCLGE_ROCEE_OVF_INT_MASK) {
|
||||
|
@ -1478,7 +1546,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
|
|||
/* reset everything for now */
|
||||
return HNAE3_GLOBAL_RESET;
|
||||
}
|
||||
reset_type = HNAE3_FUNC_RESET;
|
||||
}
|
||||
|
||||
/* clear error status */
|
||||
|
|
|
@ -94,6 +94,7 @@
|
|||
#define HCLGE_ROCEE_RAS_CE_INT_EN_MASK 0x1
|
||||
#define HCLGE_ROCEE_RERR_INT_MASK BIT(0)
|
||||
#define HCLGE_ROCEE_BERR_INT_MASK BIT(1)
|
||||
#define HCLGE_ROCEE_AXI_ERR_INT_MASK GENMASK(1, 0)
|
||||
#define HCLGE_ROCEE_ECC_INT_MASK BIT(2)
|
||||
#define HCLGE_ROCEE_OVF_INT_MASK BIT(3)
|
||||
#define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000
|
||||
|
|
Loading…
Reference in a new issue