[PATCH 3/3 -mm] kexec jump -v8 : access memory image of kexec_image

From: Huang, Ying
Date: Fri Dec 21 2007 - 02:43:35 EST


This patch adds a file in proc file system to access the loaded
kexec_image, which may contains the memory image of kexeced
system. This can be used to:

- Communicate between original kernel and kexeced kernel through write
to some pages in original kernel.

- Communicate between original kernel and kexeced kernel through read
memory image of kexeced kernel, amend the image, and reload the
amended image.

- Accelerate boot of kexeced kernel. If you have a memory image of
kexeced kernel, you need not a normal boot process to jump to the
kexeced kernel, just load the memory image, jump to the point where
you leave last time in kexeced kernel.

Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx>

---
fs/proc/Makefile | 1
fs/proc/kimgcore.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/proc/proc_misc.c | 6 +
include/linux/kexec.h | 7 +
kernel/kexec.c | 5
5 files changed, 291 insertions(+), 5 deletions(-)

--- /dev/null
+++ b/fs/proc/kimgcore.c
@@ -0,0 +1,277 @@
+/*
+ * fs/proc/kimgcore.c - Interface for accessing the loaded
+ * kexec_image, which may contains the memory image of kexeced system.
+ * Heavily borrowed from fs/proc/kcore.c
+ *
+ * Copyright (C) 2007, Intel Corp.
+ * Huang Ying <ying.huang@xxxxxxxxx>
+ *
+ * This file is released under the GPLv2
+ */
+
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/kexec.h>
+#include <linux/io.h>
+#include <linux/highmem.h>
+#include <linux/page-flags.h>
+#include <asm/uaccess.h>
+
+struct proc_dir_entry *proc_root_kimgcore;
+
+static u32 kimgcore_size;
+
+static char *elfcorebuf;
+static size_t elfcorebuf_sz;
+
+static void *buf_page;
+
+static ssize_t kimage_copy_to_user(struct kimage *image, char __user *buf,
+ unsigned long offset, size_t count)
+{
+ kimage_entry_t *ptr, entry;
+ unsigned long off = 0, offinp, trunk;
+ struct page *page;
+ void *vaddr;
+
+ for_each_kimage_entry(image, ptr, entry) {
+ if (!(entry & IND_SOURCE))
+ continue;
+ if (off + PAGE_SIZE > offset) {
+ offinp = offset - off;
+ if (count > PAGE_SIZE - offinp)
+ trunk = PAGE_SIZE - offinp;
+ else
+ trunk = count;
+ page = pfn_to_page(entry >> PAGE_SHIFT);
+ if (PageHighMem(page)) {
+ vaddr = kmap(page);
+ memcpy(buf_page, vaddr+offinp, trunk);
+ kunmap(page);
+ vaddr = buf_page;
+ } else
+ vaddr = __va(entry & PAGE_MASK) + offinp;
+ if (copy_to_user(buf, vaddr, trunk))
+ return -EFAULT;
+ buf += trunk;
+ offset += trunk;
+ count -= trunk;
+ if (!count)
+ break;
+ }
+ off += PAGE_SIZE;
+ }
+ return count;
+}
+
+static ssize_t kimage_copy_from_user(struct kimage *image,
+ const char __user *buf,
+ unsigned long offset,
+ size_t count)
+{
+ kimage_entry_t *ptr, entry;
+ unsigned long off = 0, offinp, trunk;
+ struct page *page;
+ void *vaddr;
+
+ for_each_kimage_entry(image, ptr, entry) {
+ if (!(entry & IND_SOURCE))
+ continue;
+ if (off + PAGE_SIZE > offset) {
+ offinp = offset - off;
+ if (count > PAGE_SIZE - offinp)
+ trunk = PAGE_SIZE - offinp;
+ else
+ trunk = count;
+ page = pfn_to_page(entry >> PAGE_SHIFT);
+ if (PageHighMem(page))
+ vaddr = buf_page;
+ else
+ vaddr = __va(entry & PAGE_MASK) + offinp;
+ if (copy_from_user(vaddr, buf, trunk))
+ return -EFAULT;
+ if (PageHighMem(page)) {
+ vaddr = kmap(page);
+ memcpy(vaddr+offinp, buf_page, trunk);
+ kunmap(page);
+ }
+ buf += trunk;
+ offset += trunk;
+ count -= trunk;
+ if (!count)
+ break;
+ }
+ off += PAGE_SIZE;
+ }
+ return count;
+}
+
+static ssize_t read_kimgcore(struct file *file, char __user *buffer,
+ size_t buflen, loff_t *fpos)
+{
+ size_t acc = 0;
+ size_t tsz;
+ ssize_t ssz;
+
+ if (buflen == 0 || *fpos >= kimgcore_size)
+ return 0;
+
+ /* trim buflen to not go beyond EOF */
+ if (buflen > kimgcore_size - *fpos)
+ buflen = kimgcore_size - *fpos;
+ /* Read ELF core header */
+ if (*fpos < elfcorebuf_sz) {
+ tsz = elfcorebuf_sz - *fpos;
+ if (buflen < tsz)
+ tsz = buflen;
+ if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
+ return -EFAULT;
+ buflen -= tsz;
+ *fpos += tsz;
+ buffer += tsz;
+ acc += tsz;
+
+ /* leave now if filled buffer already */
+ if (buflen == 0)
+ return acc;
+ }
+
+ ssz = kimage_copy_to_user(kexec_image, buffer,
+ *fpos - elfcorebuf_sz, buflen);
+ if (ssz < 0)
+ return ssz;
+
+ *fpos += (buflen - ssz);
+ acc += (buflen - ssz);
+
+ return acc;
+}
+
+static ssize_t write_kimgcore(struct file *file, const char __user *buffer,
+ size_t count, loff_t *fpos)
+{
+ ssize_t ssz;
+
+ if (count == 0)
+ return 0;
+
+ /* Can not write to ELF core header or write beyond EOF */
+ if (*fpos < elfcorebuf_sz || *fpos >= kimgcore_size)
+ return -EFAULT;
+ /* trim count to not go beyond EOF */
+ if (count > kimgcore_size - *fpos)
+ count = kimgcore_size - *fpos;
+
+ ssz = kimage_copy_from_user(kexec_image, buffer,
+ *fpos - elfcorebuf_sz, count);
+ if (ssz >= 0)
+ *fpos += (count - ssz);
+
+ return count - ssz;
+}
+
+static int init_kimgcore(void)
+{
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ struct kexec_segment *seg;
+ Elf64_Off off;
+ unsigned long i;
+
+ elfcorebuf_sz = sizeof(Elf64_Ehdr) +
+ kexec_image->nr_segments * sizeof(Elf64_Phdr);
+ elfcorebuf = kzalloc(elfcorebuf_sz, GFP_KERNEL);
+ if (!elfcorebuf)
+ return -ENOMEM;
+ ehdr = (Elf64_Ehdr *)elfcorebuf;
+ memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+ ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+ ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+ ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+ ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+ memset(ehdr->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+ ehdr->e_type = ET_CORE;
+ ehdr->e_machine = ELF_ARCH;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_entry = kexec_image->start;
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_shoff = 0;
+ ehdr->e_flags = 0;
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+ ehdr->e_phnum = kexec_image->nr_segments;
+ ehdr->e_shentsize = 0;
+ ehdr->e_shnum = 0;
+ ehdr->e_shstrndx = 0;
+
+ off = elfcorebuf_sz;
+ phdr = (Elf64_Phdr *)(elfcorebuf + sizeof(Elf64_Ehdr));
+ seg = kexec_image->segment;
+ for (i = 0; i < kexec_image->nr_segments; i++, phdr++, seg++) {
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = off;
+ phdr->p_paddr = seg->mem;
+ phdr->p_filesz = seg->memsz;
+ phdr->p_memsz = seg->memsz;
+ phdr->p_align = PAGE_SIZE;
+ if (seg->mem < (unsigned long)high_memory)
+ phdr->p_vaddr = (unsigned long)__va(seg->mem);
+ off += seg->memsz;
+ }
+ kimgcore_size = off;
+
+#ifdef CONFIG_HIGHMEM
+ buf_page = (void *)__get_free_page(GFP_KERNEL);
+ if (!buf_page) {
+ kfree(elfcorebuf);
+ return -ENOMEM;
+ }
+#endif
+ return 0;
+}
+
+static void destroy_kimgcore(void)
+{
+ kfree(elfcorebuf);
+#ifdef CONFIG_HIGHMEM
+ free_page((unsigned long)buf_page);
+#endif
+ elfcorebuf_sz = 0;
+ kimgcore_size = 0;
+}
+
+static int open_kimgcore(struct inode *inode, struct file *filp)
+{
+ int ret;
+ if (xchg(&kexec_lock, 1))
+ return -EBUSY;
+ if (!kexec_image) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+ ret = init_kimgcore();
+ if (ret)
+ goto unlock;
+ return 0;
+unlock:
+ xchg(&kexec_lock, 0);
+ return ret;
+}
+
+static int release_kimgcore(struct inode *inode, struct file *filp)
+{
+ destroy_kimgcore();
+ xchg(&kexec_lock, 0);
+ return 0;
+}
+
+const struct file_operations proc_kimgcore_operations = {
+ .read = read_kimgcore,
+ .write = write_kimgcore,
+ .open = open_kimgcore,
+ .release = release_kimgcore,
+};
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -9,6 +9,7 @@
#include <linux/ioport.h>
#include <linux/elfcore.h>
#include <linux/elf.h>
+#include <linux/proc_fs.h>
#include <asm/kexec.h>

/* Verify architecture specific macros are defined */
@@ -103,6 +104,10 @@ struct kimage {
};


+#define for_each_kimage_entry(image, ptr, entry) \
+ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
+ ptr = (entry & IND_INDIRECTION)? \
+ phys_to_virt((entry & PAGE_MASK)): ptr + 1)

/* kexec interface functions */
extern void machine_kexec(struct kimage *image);
@@ -200,6 +205,8 @@ extern size_t vmcoreinfo_max_size;
int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);

+extern const struct file_operations proc_kimgcore_operations;
+extern struct proc_dir_entry *proc_root_kimgcore;
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -14,5 +14,6 @@ proc-$(CONFIG_PROC_SYSCTL) += proc_sysct
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
proc-$(CONFIG_PROC_VMCORE) += vmcore.o
+proc-$(CONFIG_KEXEC) += kimgcore.o
proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
proc-$(CONFIG_PRINTK) += kmsg.o
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -48,6 +48,7 @@
#include <linux/crash_dump.h>
#include <linux/pid_namespace.h>
#include <linux/bootmem.h>
+#include <linux/kexec.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -1034,6 +1035,11 @@ void __init proc_misc_init(void)
if (proc_vmcore)
proc_vmcore->proc_fops = &proc_vmcore_operations;
#endif
+#ifdef CONFIG_KEXEC
+ proc_root_kimgcore = create_proc_entry("kimgcore", S_IRUSR, NULL);
+ if (proc_root_kimgcore)
+ proc_root_kimgcore->proc_fops = &proc_kimgcore_operations;
+#endif
#ifdef CONFIG_MAGIC_SYSRQ
{
struct proc_dir_entry *entry;
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -611,11 +611,6 @@ static int kimage_terminate(struct kimag
return 0;
}

-#define for_each_kimage_entry(image, ptr, entry) \
- for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
- ptr = (entry & IND_INDIRECTION)? \
- phys_to_virt((entry & PAGE_MASK)): ptr +1)
-
static void kimage_free_entry(kimage_entry_t entry)
{
struct page *page;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/