Re: [PATCH 2/2] efi/cper, cxl: Decode CXL Error Log

From: Jonathan Cameron
Date: Mon Oct 10 2022 - 10:34:25 EST


On Fri, 7 Oct 2022 21:17:14 +0000
Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx> wrote:

> Print the CXL Error Log field as found in CXL Protocol Error Section.
>
> The CXL RAS Capability structure will be reused by OS First Handling
> and the duplication/appropriate placement will be addressed eventually.
>
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>

Ah. This clearly answers at least a few comments from my patch one review.
I should have read on!

> ---
> drivers/firmware/efi/cper_cxl.c | 21 +++++++++++++++++++++
> include/linux/cxl_err.h | 21 +++++++++++++++++++++
> 2 files changed, 42 insertions(+)
> create mode 100644 include/linux/cxl_err.h
>
> diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
> index e5f48f0de1a4..c3d1d0770aef 100644
> --- a/drivers/firmware/efi/cper_cxl.c
> +++ b/drivers/firmware/efi/cper_cxl.c
> @@ -8,6 +8,7 @@
> */
>
> #include <linux/cper.h>
> +#include <linux/cxl_err.h>
> #include "cper_cxl.h"
>
> #define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0)
> @@ -16,6 +17,7 @@
> #define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3)
> #define PROT_ERR_VALID_CAPABILITY BIT_ULL(4)
> #define PROT_ERR_VALID_DVSEC BIT_ULL(5)
> +#define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6)
>
> static const char * const prot_err_agent_type_strs[] = {
> "Restricted CXL Device",
> @@ -84,4 +86,23 @@ void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_e
> break;
> }
> }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG) {
> + size_t size = sizeof(*prot_err) + prot_err->dvsec_len;
> + struct ras_capability_regs *cxl_ras;
> +
> + pr_info("%s Error log length: 0x%04x\n", pfx, prot_err->err_len);
> +
> + pr_info("%s CXL Error Log:\n", pfx);
> + cxl_ras = (struct ras_capability_regs *)((long)prot_err + size);
> + pr_info("%s cxl_ras_uncor_status: 0x%08x, cxl_ras_uncor_mask: 0x%08x\n",
> + pfx, cxl_ras->uncor_status, cxl_ras->uncor_mask);
Is it worth splitting these up, so that we get a human readable line with the
individual fields broken out?

> + pr_info("%s cxl_ras_uncor_severity: 0x%08x\n", pfx,
> + cxl_ras->uncor_severity);
> + pr_info("%s cxl_ras_cor_status: 0x%08x, cxl_ras_cor_mask: 0x%08x\n",
> + pfx, cxl_ras->cor_status, cxl_ras->cor_mask);

Not outputting the cap_control register? Some of that might be useful.

> + pr_info("%s Header Log Registers:\n", pfx);
> + print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, cxl_ras->header_log,
> + sizeof(cxl_ras->header_log), 0);
> + }
> }
> diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h
> new file mode 100644
> index 000000000000..c89dbb6c286f
> --- /dev/null
> +++ b/include/linux/cxl_err.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2022 Advanced Micro Devices, Inc.
> + *
> + * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>
> + */
> +
> +#ifndef LINUX_CXL_ERR_H
> +#define LINUX_CXL_ERR_H
> +
> +struct ras_capability_regs {

CXL r3.0 Spec reference plus prefix it with cxl_

Agreed with your comment at the top. Some discussion needed on where to
put this - or whether to delay figuring that out until a later stage.

> + u32 uncor_status;
> + u32 uncor_mask;
> + u32 uncor_severity;
> + u32 cor_status;
> + u32 cor_mask;
> + u32 cap_control;
> + u32 header_log[16];
> +};
> +
> +#endif //__CXL_ERR_