Re: [PATCH 3/3] mce: acpi/apei: trace: Enable ghes memory error traceevent
From: Mauro Carvalho Chehab
Date: Thu Aug 08 2013 - 15:38:38 EST
Em Thu, 08 Aug 2013 23:57:51 +0530
"Naveen N. Rao" <naveen.n.rao@xxxxxxxxxxxxxxxxxx> escreveu:
> Enable memory error trace event in cper.c
Why do we need to do that? Memory error events are already handled
via edac_ghes module, in a standard way that allows the same
tracing to be used by either BIOS or by direct hardware error
report mechanism.
Adding an extra tracing for the same thing here will just make
userspace more complex, and will create duplicated error events
for the very same error.
Ok, if the interface there is poor, let's improve it, but adding
yet another interface to report the same error doesn't sound
reasonable on my eyes.
Regards,
Mauro
>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@xxxxxxxxxxxxxxxxxx>
> ---
> drivers/acpi/apei/cper.c | 21 ++++++++++++++++-----
> 1 file changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
> index 33dc6a0..19a9c0b 100644
> --- a/drivers/acpi/apei/cper.c
> +++ b/drivers/acpi/apei/cper.c
> @@ -32,6 +32,10 @@
> #include <linux/pci.h>
> #include <linux/aer.h>
>
> +#define CREATE_TRACE_POINTS
> +#define TRACE_EVENT_GHES
> +#include <trace/events/ras.h>
> +
> /*
> * CPER record ID need to be unique even after reboot, because record
> * ID is used as index for ERST storage, while CPER records from
> @@ -193,8 +197,13 @@ static const char *cper_mem_err_type_strs[] = {
> "scrub uncorrected error",
> };
>
> -static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> +static void cper_print_mem(const char *pfx,
> + const struct acpi_hest_generic_status *estatus,
> + const struct acpi_hest_generic_data *gdata,
> + const struct cper_sec_mem_err *mem)
> {
> + trace_ghes_platform_memory_event(estatus, gdata, mem);
> +
> if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
> if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
> @@ -292,8 +301,10 @@ static const char *apei_estatus_section_flag_strs[] = {
> "latent error",
> };
>
> -static void apei_estatus_print_section(
> - const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
> +static void apei_estatus_print_section(const char *pfx,
> + const struct acpi_hest_generic_status *estatus,
> + const struct acpi_hest_generic_data *gdata,
> + int sec_no)
> {
> uuid_le *sec_type = (uuid_le *)gdata->section_type;
> __u16 severity;
> @@ -320,7 +331,7 @@ static void apei_estatus_print_section(
> struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
> printk("%s""section_type: memory error\n", pfx);
> if (gdata->error_data_length >= sizeof(*mem_err))
> - cper_print_mem(pfx, mem_err);
> + cper_print_mem(pfx, estatus, gdata, mem_err);
> else
> goto err_section_too_small;
> } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
> @@ -355,7 +366,7 @@ void apei_estatus_print(const char *pfx,
> gdata = (struct acpi_hest_generic_data *)(estatus + 1);
> while (data_len > sizeof(*gdata)) {
> gedata_len = gdata->error_data_length;
> - apei_estatus_print_section(pfx, gdata, sec_no);
> + apei_estatus_print_section(pfx, estatus, gdata, sec_no);
> data_len -= gedata_len + sizeof(*gdata);
> gdata = (void *)(gdata + 1) + gedata_len;
> sec_no++;
--
Cheers,
Mauro
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/