[PATCH v5] ACPI: APEI: fix missing erst record id

From: Liu Xinpeng
Date: Sat Apr 09 2022 - 08:25:27 EST


Read a record is cleared by others, but the deleted record cache entry is
still created by erst_get_record_id_next. When next enumerate the records,
get the cached deleted record, then erst_read return -ENOENT and try to
get next record, loop back to first ID will return 0 in function
__erst_record_id_cache_add_one and then set record_id as
APEI_ERST_INVALID_RECORD_ID, finished this time read operation.
It will result in read the records just in the cache hereafter.

This patch cleared the deleted record cache, fix the issue that
"./erst-inject -p" shows record counts not equal to "./erst-inject -n".

A reproducer of the problem(retry many times):

[root@localhost erst-inject]# ./erst-inject -c 0xaaaaa00011
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000006
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000007
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000008
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -n
total error record count: 6

Changelog:
v1->v2 fix style problems
v2->v3 fix apei_read_mce called erst_get_record_id_next and modify
the commit message.
v3->v4 add erst_clear_cache in another retry.
v4->v5 implement a new function for looking for a specific record
type, suggested by Luck Tony

Signed-off-by: Liu Xinpeng <liuxp11@xxxxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/mce/apei.c | 8 ++---
drivers/acpi/apei/erst-dbg.c | 3 +-
drivers/acpi/apei/erst.c | 62 ++++++++++++++++++++++++++++++++--
include/acpi/apei.h | 2 ++
4 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 0e3ae64d3b76..717192915f28 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -177,16 +177,14 @@ ssize_t apei_read_mce(struct mce *m, u64 *record_id)
/* no more record */
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
goto out;
- rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
+ rc = erst_read_record(*record_id, &rcd.hdr, sizeof(rcd), sizeof(rcd),
+ &CPER_CREATOR_MCE);
/* someone else has cleared the record, try next one */
if (rc == -ENOENT)
goto retry;
else if (rc < 0)
goto out;
- /* try to skip other type records in storage */
- else if (rc != sizeof(rcd) ||
- !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
- goto retry;
+
memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m);
out:
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index c740f0faad39..8bc71cdc2270 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -111,7 +111,8 @@ static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
goto out;
}
retry:
- rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
+ rc = len = erst_read_record(id, erst_dbg_buf, erst_dbg_buf_len,
+ erst_dbg_buf_len, NULL);
/* The record may be cleared by others, try read next record */
if (rc == -ENOENT)
goto retry_next;
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 698d67cee052..1dc238783f63 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -856,6 +856,63 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
}
EXPORT_SYMBOL_GPL(erst_read);

+static int erst_clear_cache(u64 record_id)
+{
+ int rc, i;
+ u64 *entries;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == record_id)
+ entries[i] = APEI_ERST_INVALID_RECORD_ID;
+ }
+ __erst_record_id_cache_compact();
+
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ return rc;
+}
+
+ssize_t erst_read_record(u64 record_id, struct cper_record_header *record,
+ size_t buflen, size_t recordlen, const guid_t *creatorid)
+{
+ ssize_t len;
+
+ /*
+ * if creatorid is NULL, read any record for erst-dbg module
+ */
+ if (creatorid == NULL) {
+ len = erst_read(record_id, record, buflen);
+ if (len == -ENOENT)
+ erst_clear_cache(record_id);
+
+ return len;
+ }
+
+ len = erst_read(record_id, record, buflen);
+ /*
+ * if erst_read return value is -ENOENT or not equal to record length,
+ * or not wanted creatorid, consider them as not found,
+ * and clear the record_id cache.
+ */
+ if (len == -ENOENT || len != recordlen ||
+ !guid_equal(&record->creator_id, creatorid)) {
+ len = -ENOENT;
+ erst_clear_cache(record_id);
+ }
+
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(erst_read_record);
+
int erst_clear(u64 record_id)
{
int rc, i;
@@ -996,7 +1053,8 @@ static ssize_t erst_reader(struct pstore_record *record)
goto out;
}

- len = erst_read(record_id, &rcd->hdr, rcd_len);
+ len = erst_read_record(record_id, &rcd->hdr, rcd_len, rcd_len,
+ &CPER_CREATOR_PSTORE);
/* The record may be cleared by others, try read next record */
if (len == -ENOENT)
goto skip;
@@ -1004,8 +1062,6 @@ static ssize_t erst_reader(struct pstore_record *record)
rc = -EIO;
goto out;
}
- if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
- goto skip;

record->buf = kmalloc(len, GFP_KERNEL);
if (record->buf == NULL) {
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index afaca3a075e8..dc60f7db5524 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -46,6 +46,8 @@ int erst_get_record_id_next(int *pos, u64 *record_id);
void erst_get_record_id_end(void);
ssize_t erst_read(u64 record_id, struct cper_record_header *record,
size_t buflen);
+ssize_t erst_read_record(u64 record_id, struct cper_record_header *record,
+ size_t buflen, size_t recordlen, const guid_t *creatorid);
int erst_clear(u64 record_id);

int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
--
2.23.0