RE: [PATCH v2 13/16] x86/mce: Unify AMD DFR handler with MCA Polling
From: Zhuo, Qiuxu
Date: Tue Feb 18 2025 - 02:37:35 EST
> From: Yazen Ghannam <yazen.ghannam@xxxxxxx>
> [...]
> +static bool smca_should_log_poll_error(enum mcp_flags flags, struct
> +mce_hw_err *err) {
> + struct mce *m = &err->m;
> +
> + /*
> + * If this is a deferred error found in MCA_STATUS, then clear
> + * the redundant data from the MCA_DESTAT register.
> + */
> + if (m->status & MCI_STATUS_VAL) {
> + if (m->status & MCI_STATUS_DEFERRED)
> + mce_wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(m-
> >bank), 0);
> +
> + return true;
> + }
> +
> + /*
> + * If the MCA_DESTAT register has valid data, then use
> + * it as the status register.
> + */
> + m->status = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DESTAT(m-
> >bank));
> +
> + if (!(m->status & MCI_STATUS_VAL))
> + return false;
> +
> + /*
> + * Gather all relevant data now and log the record before clearing
> + * the deferred status register. This avoids needing to go back to
> + * the polling function for these actions.
> + */
> + mce_read_aux(err, m->bank);
> +
> + if (m->status & MCI_STATUS_ADDRV)
> + m->addr =
> mce_rdmsrl(MSR_AMD64_SMCA_MCx_DEADDR(m->bank));
> +
> + smca_extract_err_addr(m);
> + m->severity = mce_severity(m, NULL, NULL, false);
> +
Is the following check in machine_check_poll() needed before
queuing/logging AMD's deferred error?
if (mca_cfg.dont_log_ce && !mce_usable_address(m))
//Just clear MCA_STATUS, but not queue/log errors.
> + if (flags & MCP_QUEUE_LOG)
> + mce_gen_pool_add(err);
> + else
> + mce_log(err);
> +
> + mce_wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
> + return false;
> +}
[...]