[PATCH v2] ACPI: APEI: fix missing erst record id

From: Liu Xinpeng
Date: Tue Apr 05 2022 - 02:14:54 EST


record_id is in the erst_record_id_cache but not in storage,so
erst_read will return -ENOENT, and then goto retry_next,
erst_get_record_id_next skip a record_id. This can result in
printing the records just in the cache.

A reproducer of the problem(retry many times):

[root@localhost erst-inject]# ./erst-inject -c 0xaaaaa00011
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000006
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000007
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000008
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -n
total error record count: 6

Signed-off-by: Liu Xinpeng <liuxp11@xxxxxxxxxxxxxxx>
---
drivers/acpi/apei/erst-dbg.c | 4 +++-
drivers/acpi/apei/erst.c | 34 +++++++++++++++++++++++++++++++---
include/acpi/apei.h | 1 +
3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index c740f0faad39..5b8164280a17 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -113,8 +113,10 @@ static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
retry:
rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
/* The record may be cleared by others, try read next record */
- if (rc == -ENOENT)
+ if (rc == -ENOENT) {
+ erst_clear_cache(id);
goto retry_next;
+ }
if (rc < 0)
goto out;
if (len > ERST_DBG_RECORD_LEN_MAX) {
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 698d67cee052..07d69dc7fd62 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -856,6 +856,31 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
}
EXPORT_SYMBOL_GPL(erst_read);

+int erst_clear_cache(u64 record_id)
+{
+ int rc, i;
+ u64 *entries;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == record_id)
+ entries[i] = APEI_ERST_INVALID_RECORD_ID;
+ }
+ __erst_record_id_cache_compact();
+
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(erst_clear_cache);
+
int erst_clear(u64 record_id)
{
int rc, i;
@@ -998,14 +1023,17 @@ static ssize_t erst_reader(struct pstore_record *record)

len = erst_read(record_id, &rcd->hdr, rcd_len);
/* The record may be cleared by others, try read next record */
- if (len == -ENOENT)
+ if (len == -ENOENT) {
+ erst_clear_cache(record_id);
goto skip;
- else if (len < 0 || len < sizeof(*rcd)) {
+ } else if (len < 0 || len < sizeof(*rcd)) {
rc = -EIO;
goto out;
}
- if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
+ if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE)) {
+ erst_clear_cache(record_id);
goto skip;
+ }

record->buf = kmalloc(len, GFP_KERNEL);
if (record->buf == NULL) {
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index afaca3a075e8..f8c11ff4115a 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -47,6 +47,7 @@ void erst_get_record_id_end(void);
ssize_t erst_read(u64 record_id, struct cper_record_header *record,
size_t buflen);
int erst_clear(u64 record_id);
+int erst_clear_cache(u64 record_id);

int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
--
2.23.0