Re: [PATCH v3 03/14] acpi/ghes: Use HEST table offsets when preparing GHES records
From: Igor Mammedov
Date: Mon Feb 03 2025 - 09:48:47 EST
On Fri, 31 Jan 2025 18:42:44 +0100
Mauro Carvalho Chehab <mchehab+huawei@xxxxxxxxxx> wrote:
> There are two pointers that are needed during error injection:
>
> 1. The start address of the CPER block to be stored;
> 2. The address of the ack.
>
> It is preferable to calculate them from the HEST table. This allows
> checking the source ID, the size of the table and the type of the
> HEST error block structures.
>
> Yet, keep the old code, as this is needed for migration purposes.
>
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@xxxxxxxxxx>
> ---
> hw/acpi/ghes.c | 132 ++++++++++++++++++++++++++++++++++++-----
> include/hw/acpi/ghes.h | 1 +
> 2 files changed, 119 insertions(+), 14 deletions(-)
>
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 27478f2d5674..8f284fd191a6 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -41,6 +41,12 @@
> /* Address offset in Generic Address Structure(GAS) */
> #define GAS_ADDR_OFFSET 4
>
> +/*
> + * ACPI spec 1.0b
> + * 5.2.3 System Description Table Header
> + */
> +#define ACPI_DESC_HEADER_OFFSET 36
> +
> /*
> * The total size of Generic Error Data Entry
> * ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
> @@ -61,6 +67,25 @@
> */
> #define ACPI_GHES_GESB_SIZE 20
>
> +/*
> + * Offsets with regards to the start of the HEST table stored at
> + * ags->hest_addr_le,
If I read this literary, then offsets above are not what
declared later in this patch.
I'd really drop this comment altogether as it's confusing,
and rather get variables/macro naming right
> according with the memory layout map at
> + * docs/specs/acpi_hest_ghes.rst.
> + */
what we need is update to above doc, describing new and old ways.
a separate patch.
> +
> +/*
> + * ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2
^^^^^^^^ - wrt version, I see it in 6.1.
our req is to point to the earliest doc where it has appeared.
it it must point to a later version for some justified reason
the explanation 'why' should be mentioned in comment message.
please check all versioning/chapters you are touching/adding in this series.
> + * Table 18-382 Generic Hardware Error Source version 2 (GHESv2) Structure
> + */
> +#define HEST_GHES_V2_TABLE_SIZE 92
it's not table but rather an GHES_V2 entry in HEST
and should be named as such (emph on _entry_)
> +#define GHES_READ_ACK_ADDR_OFF 64
please, add a comment like below but for 'Read Ack Register'
> +/*
> + * ACPI 6.2: 18.3.2.7: Generic Hardware Error Source
> + * Table 18-380: 'Error Status Address' field
> + */
> +#define GHES_ERR_STATUS_ADDR_OFF 20
> +
> /*
> * Values for error_severity field
> */
> @@ -212,14 +237,6 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker,
> {
> int i, error_status_block_offset;
>
> - /*
> - * TODO: Current version supports only one source.
> - * A further patch will drop this check, after adding a proper migration
> - * code, as, for the code to work, we need to store a bios pointer to the
> - * HEST table.
> - */
> - assert(num_sources == 1);
> -
> /* Build error_block_address */
> for (i = 0; i < num_sources; i++) {
> build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
> @@ -352,6 +369,14 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
> .oem_id = oem_id, .oem_table_id = oem_table_id };
> uint32_t hest_offset;
> int i;
> + AcpiGedState *acpi_ged_state;
> + AcpiGhesState *ags = NULL;
> +
> + acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
> + NULL));
> + if (acpi_ged_state) {
> + ags = &acpi_ged_state->ghes_state;
> + }
>
> hest_offset = table_data->len;
>
> @@ -371,10 +396,12 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
> * Tell firmware to write into GPA the address of HEST via fw_cfg,
> * once initialized.
> */
> - bios_linker_loader_write_pointer(linker,
> - ACPI_HEST_ADDR_FW_CFG_FILE, 0,
> - sizeof(uint64_t),
> - ACPI_BUILD_TABLE_FILE, hest_offset);
> + if (ags->use_hest_addr) {
> + bios_linker_loader_write_pointer(linker,
> + ACPI_HEST_ADDR_FW_CFG_FILE, 0,
> + sizeof(uint64_t),
> + ACPI_BUILD_TABLE_FILE, hest_offset);
> + }
I'd move this patch before 2/14, to avoid issues during bisection.
Also legacy variant is hidden in build_ghes_error_table()
/*
* tell firmware to write hardware_errors GPA into
* hardware_errors_addr fw_cfg, once the former has been initialized.
*/
bios_linker_loader_write_pointer()
and after this patch we end up with scattered code that should pick
only one them (but doesn't).
As prereq, I'd move legacy into acpi_build_hest() as separate patch,
then do this patch adds above 'if' gate,
and followup patch [2/14 currently] adds bios_linker_loader_write_pointer(ACPI_HEST_ADDR_FW_CFG_FILE)
> }
>
> void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
shouldn't we do the same for fw_cfg_add_file_callback() hunk added
in previous patch and related 'fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE'
we need only one of them.
> @@ -420,6 +447,78 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
> *read_ack_register_addr = ghes_addr + sizeof(uint64_t);
> }
>
> +static void get_ghes_source_offsets(uint16_t source_id,
> + uint64_t hest_addr,
> + uint64_t *cper_addr,
> + uint64_t *read_ack_start_addr,
> + Error **errp)
> +{
> + uint64_t hest_err_block_addr, hest_read_ack_addr;
> + uint64_t err_source_entry, error_block_addr;
> + uint32_t num_sources, i;
> +
> + hest_addr += ACPI_DESC_HEADER_OFFSET;
> +
> + cpu_physical_memory_read(hest_addr, &num_sources,
> + sizeof(num_sources));
> + num_sources = le32_to_cpu(num_sources);
> +
> + err_source_entry = hest_addr + sizeof(num_sources);
> +
> + /*
> + * Currently, HEST Error source navigates only for GHESv2 tables
> + */
> +
not needed newline
> + for (i = 0; i < num_sources; i++) {
> + uint64_t addr = err_source_entry;
> + uint16_t type, src_id;
> +
> + cpu_physical_memory_read(addr, &type, sizeof(type));
> + type = le16_to_cpu(type);
> +
> + /* For now, we only know the size of GHESv2 table */
> + if (type != ACPI_GHES_SOURCE_GENERIC_ERROR_V2) {
> + error_setg(errp, "HEST: type %d not supported.", type);
> + return;
> + }
> +
> + /* Compare CPER source address at the GHESv2 structure */
> + addr += sizeof(type);
> + cpu_physical_memory_read(addr, &src_id, sizeof(src_id));
> + if (le16_to_cpu(src_id) == source_id) {
> + break;
> + }
> +
> + err_source_entry += HEST_GHES_V2_TABLE_SIZE;
> + }
> + if (i == num_sources) {
> + error_setg(errp, "HEST: Source %d not found.", source_id);
> + return;
> + }
> +
> + /* Navigate though table address pointers */
> + hest_err_block_addr = err_source_entry + GHES_ERR_STATUS_ADDR_OFF +
> + GAS_ADDR_OFFSET;
> +
> + cpu_physical_memory_read(hest_err_block_addr, &error_block_addr,
> + sizeof(error_block_addr));
> +
I'd drop newlines for related read/processing
> + error_block_addr = le64_to_cpu(error_block_addr);
> +
> + cpu_physical_memory_read(error_block_addr, cper_addr,
> + sizeof(*cper_addr));
> +
ditto
> + *cper_addr = le64_to_cpu(*cper_addr);
> +
> + hest_read_ack_addr = err_source_entry + GHES_READ_ACK_ADDR_OFF +
> + GAS_ADDR_OFFSET;
> +
> + cpu_physical_memory_read(hest_read_ack_addr, read_ack_start_addr,
> + sizeof(*read_ack_start_addr));
> +
ditto
> + *read_ack_start_addr = le64_to_cpu(*read_ack_start_addr);
> +}
> +
> void ghes_record_cper_errors(const void *cper, size_t len,
> uint16_t source_id, Error **errp)
> {
> @@ -440,8 +539,13 @@ void ghes_record_cper_errors(const void *cper, size_t len,
> }
> ags = &acpi_ged_state->ghes_state;
>
> - get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
> - &cper_addr, &read_ack_register_addr);
> + if (!ags->hest_addr_le) {
> + get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
> + &cper_addr, &read_ack_register_addr);
> + } else {
> + get_ghes_source_offsets(source_id, le64_to_cpu(ags->hest_addr_le),
> + &cper_addr, &read_ack_register_addr, errp);
> + }
>
> if (!cper_addr) {
> error_setg(errp, "can not find Generic Error Status Block");
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index 237721fec0a2..6c2e57af0456 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -61,6 +61,7 @@ typedef struct AcpiGhesState {
> uint64_t hest_addr_le;
> uint64_t hw_error_le;
> bool present; /* True if GHES is present at all on this board */
> + bool use_hest_addr; /* True if HEST address is present */
> } AcpiGhesState;
>
> /*
an