[RFC][PATCH v2 3/3] Check if existing entry is erasable

From: Seiji Aguchi
Date: Thu Jul 19 2012 - 17:14:49 EST


[Problem]
When efi_pstore holds just one log and it doesn't overwrite an exisiting entry,
we lose a critical message if kernel panics while system is rebooting.

[Solution]
If users decide that NVRAM size is not big enough to hold multiple logs,
efi_pstore has to handle just one log and avoid a critical messages by
overwriting existing entry.

This patch adds some logic checking if an existing entry is erasable in case of
holding just one log.

The rule is as follows.

- In case where an existing entry is panic or emergency
- It is not erasable because if panic/emergency event is lost, we have no way
to detect the root cause. We shouldn't overwrite them for any reason.

- In case where an existing entry is oops/shutdown/halt/poweroff
- It is erasable if an error ,panic, emergency or oops, happens in new event
because we will probably get messages of multiple events by erasing
an existing entry.


ex)
Even though reboot message is overwritten by panic one, we will probably save both
final part of reboot message and panic message as follows.

Example of kmsg in NVRAM
<snip>
Panic#1 <- header supplied by pstore
<6>kvm: exiting hardware virtualization
<5>sd 0:0:0:0: [sda] Synchronizing SCSI cache
<0>Restarting system. <- reboot message
<0>BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:0]
<0> Kernel panic - not syncing: softlockup: hung tasks <- panic message
<0>Pid: 0, comm: swapper/0 Not tainted 3.3.8 #4 Call Trace:
<0><IRQ> [<ffffffff8136bdd5>] panic+0xb8/0x1c4
<0>[<ffffffff81071f37>] watchdog_timer_fn+0x139/0x15d
<0>[<ffffffff81071dfe>] ? __touch_watchdog+0x1f/0x1f
<snip>

Signed-off-by: Seiji Aguchi <seiji.aguchi@xxxxxxx>
---
drivers/firmware/Makefile | 2 +
drivers/firmware/efivars.c | 111 ++++++++++++++++++++++++++++++++++++++++++-
fs/pstore/platform.c | 4 +-
include/linux/kmsg_dump.h | 8 ++--
include/linux/pstore.h | 5 ++
5 files changed, 121 insertions(+), 9 deletions(-)

diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 5a7e273..8f7724c 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -14,3 +14,5 @@ obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o
obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o

obj-$(CONFIG_GOOGLE_FIRMWARE) += google/
+
+CFLAGS_efivars.o += -Wswitch-enum
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 55188d7..29628a2 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -730,6 +730,85 @@ static unsigned long get_current_log_num(efi_char16_t *efi_name,
return current_log_num;
}

+static bool can_erase_entry(struct efivar_entry *entry, enum kmsg_dump_reason
+ new_reason)
+{
+ enum kmsg_dump_reason prev_reason = 0;
+ const char *prev_why;
+ bool is_erasable = 0;
+
+ /* Get a reason of previous message */
+ while (prev_reason < INT_MAX) {
+ prev_why = pstore_get_reason_str(prev_reason);
+ if (!strncmp(entry->var.Data, prev_why, strlen(prev_why)))
+ break;
+ prev_reason++;
+ }
+
+ /* check if existing message is erasable */
+
+ switch (prev_reason) {
+ case KMSG_DUMP_PANIC:
+ case KMSG_DUMP_EMERG:
+ /* Never erase panic or emergency message */
+ break;
+ case KMSG_DUMP_OOPS:
+ case KMSG_DUMP_RESTART:
+ case KMSG_DUMP_HALT:
+ case KMSG_DUMP_POWEROFF:
+ /* Can erase if new one is error message */
+ if (new_reason <= KMSG_DUMP_EMERG)
+ is_erasable = 1;
+ break;
+ /*
+ * Default is not specified to complain if a new KMSG_DUMP enum
+ * value is added without considering this logic.
+ */
+ }
+
+ return is_erasable;
+}
+
+static int clean_up_existing_entry(efi_char16_t *efi_name, ssize_t size,
+ enum kmsg_dump_reason reason,
+ struct pstore_info *psi,
+ struct efivar_entry **found)
+{
+ efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
+ struct efivars *efivars = psi->data;
+ struct efivar_entry *entry;
+ int ret = -EEXIST;
+
+ list_for_each_entry(entry, &efivars->list, list) {
+ get_var_data_locked(efivars, &entry->var);
+
+ if (efi_guidcmp(entry->var.VendorGuid, vendor))
+ continue;
+ if (utf16_strncmp(entry->var.VariableName, efi_name,
+ utf16_strlen(efi_name)))
+ continue;
+ /* Needs to be a prefix */
+ if (entry->var.VariableName[utf16_strlen(efi_name)] == 0)
+ continue;
+
+ if (!can_erase_entry(entry, reason))
+ break;
+ /* Erasable entry is found */
+ *found = entry;
+ efivars->ops->set_variable(entry->var.VariableName,
+ &entry->var.VendorGuid,
+ PSTORE_EFI_ATTRIBUTES,
+ 0, NULL);
+ ret = 0;
+ break;
+ }
+
+ if (*found)
+ list_del(&(*found)->list);
+
+ return ret;
+}
+
static int efi_pstore_write(enum pstore_type_id type,
enum kmsg_dump_reason reason, u64 *id,
unsigned int part, size_t size, struct pstore_info *psi)
@@ -739,6 +818,7 @@ static int efi_pstore_write(enum pstore_type_id type,
efi_char16_t efi_name[DUMP_NAME_LEN];
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
struct efivars *efivars = psi->data;
+ struct efivar_entry *found = NULL;
int i, ret = 0;
unsigned long current_log_num;

@@ -753,9 +833,31 @@ static int efi_pstore_write(enum pstore_type_id type,
current_log_num = get_current_log_num(efi_name, efivars);

if (current_log_num >= efi_pstore_log_num) {
- spin_unlock(&efivars->lock);
- *id = part;
- return -EEXIST;
+ switch (efi_pstore_log_num) {
+ case 1:
+ /*
+ * In case where efi_pstore handles just one log,
+ * it tries to overwrite an existing entry to avoid losing
+ * panic message.
+ */
+ ret = clean_up_existing_entry(efi_name, size, reason,
+ psi, &found);
+ if (ret) {
+ /* Can't clean up existing entry */
+ spin_unlock(&efivars->lock);
+ *id = part;
+ return ret;
+ }
+ break;
+ default:
+ /*
+ * In case where efi_pstore handles multiple logs,
+ * it doesn't erase any existing entries.
+ */
+ spin_unlock(&efivars->lock);
+ *id = part;
+ return -EEXIST;
+ }
}

for (i = 0; i < DUMP_NAME_LEN; i++)
@@ -766,6 +868,9 @@ static int efi_pstore_write(enum pstore_type_id type,

spin_unlock(&efivars->lock);

+ if (found)
+ efivar_unregister(found);
+
if (size)
ret = efivar_create_sysfs_entry(efivars,
utf16_strsize(efi_name,
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 03ce7a9..32715eb 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -68,7 +68,7 @@ void pstore_set_kmsg_bytes(int bytes)
/* Tag each group of saved records with a sequence number */
static int oopscount;

-static const char *get_reason_str(enum kmsg_dump_reason reason)
+const char *pstore_get_reason_str(enum kmsg_dump_reason reason)
{
switch (reason) {
case KMSG_DUMP_PANIC:
@@ -104,7 +104,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
int is_locked = 0;
int ret;

- why = get_reason_str(reason);
+ why = pstore_get_reason_str(reason);

if (in_nmi()) {
is_locked = spin_trylock(&psinfo->buf_lock);
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index d6bd501..c578812 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -16,12 +16,12 @@
#include <linux/list.h>

/*
- * Keep this list arranged in rough order of priority. Anything listed after
- * KMSG_DUMP_OOPS will not be logged by default unless printk.always_kmsg_dump
- * is passed to the kernel.
+ * Keep this list arranged in rough order of priority.
+ * - Anything listed after KMSG_DUMP_OOPS will not be logged by default unless
+ * printk.always_kmsg_dump is passed to the kernel.
+ * - In EFI driver, KMSG_DUMP_EMERG and up may overwrite existing entries.
*/
enum kmsg_dump_reason {
- KMSG_DUMP_UNDEF,
KMSG_DUMP_PANIC,
KMSG_DUMP_OOPS,
KMSG_DUMP_EMERG,
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 92cb90e..af7b90a 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -54,12 +54,17 @@ struct pstore_info {

#ifdef CONFIG_PSTORE
extern int pstore_register(struct pstore_info *);
+extern const char *pstore_get_reason_str(enum kmsg_dump_reason reason);
#else
static inline int
pstore_register(struct pstore_info *psi)
{
return -ENODEV;
}
+static const char *pstore_get_reason_str(enum kmsg_dump_reason reason)
+{
+ return NULL;
+}
#endif

#endif /*_LINUX_PSTORE_H*/
-- 1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/