[RFC Patch 2/2][slimdump][makedumpfile] Recognise PANIC_MCE crashesto generate slimdu

From: K.Prasad
Date: Mon Nov 21 2011 - 05:14:33 EST


Given that the kernel indicates the cause of crash through a new field
CRASH_REASON in the VMCOREINFO elf-note, recognise the same. For crashes
caused by PANIC_MCE, avoid capture of kernel memory, instead generate
only a slimdump.

Since 'slimdump' will be of very small size (containing only elf-headers and
elf-notes section), the resultant coredump will be of ELF type (and not
kdump-compressed format).

Signed-off-by: K.Prasad <prasad@xxxxxxxxxxxxxxxxxx>
---
elf_info.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
elf_info.h | 2 +
makedumpfile.c | 13 ++++++++++-
makedumpfile.h | 1 +
4 files changed, 82 insertions(+), 1 deletions(-)

diff --git a/elf_info.c b/elf_info.c
index 114dd05..a925484 100644
--- a/elf_info.c
+++ b/elf_info.c
@@ -287,6 +287,73 @@ offset_note_desc(void *note)
return offset;
}

+#define CRASH_REASON_PANIC_MCE "CRASH_REASON=PANIC_MCE"
+
+/*
+ * This function checks if the vmcoreinfo note has its CRASH_REASON set as
+ * PANIC_MCE. This is added if the crash is due to a hardware error and
+ * when it makes no sense to read/store the crashing kernel's memory. In
+ * such a case, only a 'slimdump' is captured.
+ */
+int
+is_crash_by_mce(void)
+{
+ int note_size, ret = FALSE;
+ off_t offset;
+ char buf[VMCOREINFO_XEN_NOTE_NAME_BYTES];
+ char note[MAX_SIZE_NHDR];
+ void *vmcoreinfo_note = NULL;
+
+ offset = offset_pt_note_memory;
+ while (offset < offset_pt_note_memory + size_pt_note_memory) {
+ if (lseek(fd_memory, offset, SEEK_SET) < 0) {
+ ERRMSG("Can't seek the dump memory(%s). %s\n",
+ name_memory, strerror(errno));
+ return FALSE;
+ }
+ if (read(fd_memory, note, sizeof(note)) != sizeof(note)) {
+ ERRMSG("Can't read the dump memory(%s). %s\n",
+ name_memory, strerror(errno));
+ return FALSE;
+ }
+
+ if (read(fd_memory, &buf, sizeof(buf)) != sizeof(buf)) {
+ ERRMSG("Can't read the dump memory(%s). %s\n",
+ name_memory, strerror(errno));
+ return FALSE;
+ }
+ if (strncmp(VMCOREINFO_NOTE_NAME, buf,
+ VMCOREINFO_NOTE_NAME_BYTES)) {
+ offset += offset_next_note(note);
+ continue;
+ }
+
+ /*
+ * Now copy VMCOREINFO_NOTE to examine its contents.
+ * We need to parse it to check if the CRASH_REASON=PANIC_MCE.
+ */
+ note_size = offset_next_note(note);
+
+ vmcoreinfo_note = malloc(note_size);
+ if(!vmcoreinfo_note) {
+ ERRMSG("Can't allocate memory for the vmcoreinfo note."
+ "%s\n", strerror(errno));
+ return FALSE;
+ }
+ if (read(fd_memory, vmcoreinfo_note, note_size) != note_size) {
+ ERRMSG("Can't read the dump memory(%s). %s\n",
+ name_memory, strerror(errno));
+ goto exit;
+ }
+ if(strstr(vmcoreinfo_note, CRASH_REASON_PANIC_MCE))
+ ret = TRUE;
+ break;
+ }
+exit:
+ free(vmcoreinfo_note);
+ return ret;
+}
+
static int
get_pt_note_info(void)
{
diff --git a/elf_info.h b/elf_info.h
index 4dff9c1..0437481 100644
--- a/elf_info.h
+++ b/elf_info.h
@@ -34,6 +34,8 @@ unsigned long long get_max_paddr(void);
int get_elf64_ehdr(int fd, char *filename, Elf64_Ehdr *ehdr);
int get_elf32_ehdr(int fd, char *filename, Elf32_Ehdr *ehdr);
int get_elf_info(int fd, char *filename);
+int is_crash_by_mce(void);
+
void free_elf_info(void);

int is_elf64_memory(void);
diff --git a/makedumpfile.c b/makedumpfile.c
index 7b7c266..15efa90 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -4173,7 +4173,11 @@ write_elf_pages(struct cache_data *cd_header, struct cache_data *cd_page)
if (!get_phdr_memory(i, &load))
return FALSE;

- if (load.p_type != PT_LOAD)
+ /*
+ * Do not capture the kernel's memory if flag_nocoredump is
+ * turned on. This may be dangerous to the system stability.
+ */
+ if ((load.p_type != PT_LOAD) || (info->flag_nocoredump))
continue;

off_memory= load.p_offset;
@@ -5760,6 +5764,13 @@ create_dumpfile(void)
if (!get_elf_info(info->fd_memory, info->name_memory))
return FALSE;
}
+ /*
+ * If NT_NOCOREDUMP elf-note is present, indicate the same through
+ * 'flag_nocoredump' flag. The resultant slimdump will always be in ELF
+ * format, irrespective of the user options.
+ */
+ info->flag_nocoredump = info->flag_elf_dumpfile = is_crash_by_mce();
+
if (is_xen_memory()) {
if (!initial_xen())
return FALSE;
diff --git a/makedumpfile.h b/makedumpfile.h
index f0e5da8..faf1c65 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -778,6 +778,7 @@ struct DumpInfo {
int flag_exclude_xen_dom;/* exclude Domain-U from xen-kdump */
int flag_dmesg; /* dump the dmesg log out of the vmcore file */
int flag_nospace; /* the flag of "No space on device" error */
+ int flag_nocoredump; /* coredump not collected */
unsigned long vaddr_for_vtop; /* virtual address for debugging */
long page_size; /* size of page */
long page_shift;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/