[PATCH 30/50] ACPI, APEI, Printk queued error record before panic

From: Len Brown
Date: Tue Jan 17 2012 - 07:22:23 EST


From: Huang Ying <ying.huang@xxxxxxxxx>

Because printk is not safe inside NMI handler, the recoverable error
records received in NMI handler will be queued to be printked in a
delayed IRQ context via irq_work. If a fatal error occurs after the
recoverable error and before the irq_work processed, we lost a error
report.

To solve the issue, the queued error records are printked in NMI
handler if system will go panic.

Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx>
Signed-off-by: Len Brown <len.brown@xxxxxxxxx>
---
drivers/acpi/apei/ghes.c | 53 ++++++++++++++++++++++++++++++++++++++-------
1 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9dcb2d8..aaf3609 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -740,26 +740,34 @@ static int ghes_notify_sci(struct notifier_block *this,
return ret;
}

+static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
+{
+ struct llist_node *next, *tail = NULL;
+
+ while (llnode) {
+ next = llnode->next;
+ llnode->next = tail;
+ tail = llnode;
+ llnode = next;
+ }
+
+ return tail;
+}
+
static void ghes_proc_in_irq(struct irq_work *irq_work)
{
- struct llist_node *llnode, *next, *tail = NULL;
+ struct llist_node *llnode, *next;
struct ghes_estatus_node *estatus_node;
struct acpi_hest_generic *generic;
struct acpi_hest_generic_status *estatus;
u32 len, node_len;

+ llnode = llist_del_all(&ghes_estatus_llist);
/*
* Because the time order of estatus in list is reversed,
* revert it back to proper order.
*/
- llnode = llist_del_all(&ghes_estatus_llist);
- while (llnode) {
- next = llnode->next;
- llnode->next = tail;
- tail = llnode;
- llnode = next;
- }
- llnode = tail;
+ llnode = llist_nodes_reverse(llnode);
while (llnode) {
next = llnode->next;
estatus_node = llist_entry(llnode, struct ghes_estatus_node,
@@ -779,6 +787,32 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
}
}

+static void ghes_print_queued_estatus(void)
+{
+ struct llist_node *llnode;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_nodes_reverse(llnode);
+ while (llnode) {
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = apei_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ generic = estatus_node->generic;
+ ghes_print_estatus(NULL, generic, estatus);
+ llnode = llnode->next;
+ }
+}
+
static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{
struct ghes *ghes, *ghes_global = NULL;
@@ -804,6 +838,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)

if (sev_global >= GHES_SEV_PANIC) {
oops_begin();
+ ghes_print_queued_estatus();
__ghes_print_estatus(KERN_EMERG, ghes_global->generic,
ghes_global->estatus);
/* reboot to log the error! */
--
1.7.9.rc1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/