retry:
- rc = erst_get_record_id_next(&pos, record_id);
- if (rc)
- goto out;
+ /*
+ * Some hardware is broken and doesn't actually advance the record id
I'd blame this on firmware rather than hardware.
+ * returned by ACPI_ERST_GET_RECORD_ID when we read a record like the
+ * spec says it is supposed to. So instead use record_id == 0 to just
+ * grab the first record in the erst, and fall back only if we trip over
+ * a record that isn't a MCE record.
+ */
+ if (lookup_record) {
+ rc = erst_get_record_id_next(&pos, record_id);
+ if (rc)
+ goto out;
+ } else {
+ *record_id = 0;
+ }
/* no more record */
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
goto out;
rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
- /* someone else has cleared the record, try next one */
- if (rc == -ENOENT)
- goto retry;
- else if (rc < 0)
+ /*
+ * someone else has cleared the record, try next one if we are looking
+ * up records. If we aren't looking up the record id's then just bail
+ * since this means we have an empty table.
+ */
+ if (rc == -ENOENT) {
+ if (lookup_record)
+ goto retry;
+ rc = 0;
+ goto out;
+ } else if (rc < 0) {
goto out;
- /* try to skip other type records in storage */
- else if (rc != sizeof(rcd) ||
- uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+ } else if (rc != sizeof(rcd) ||
+ uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
+ /* try to skip other type records in storage */
+ lookup_record = true;
goto retry;
Are you still doomed by the buggy firmware if we take this "goto"?
You be back at the top of the loop excpecting erst_get_record_id_next()
to move on. Does this just not happen in practice (finding non MCE
records in amognst the MCE ones)?