[PATCH 23/38] C/R: x86_64 support

From: Alexey Dobriyan
Date: Fri May 22 2009 - 01:01:33 EST


In theory and in practice, x86_64 COMPAT=y kernel will restore i386 images
and in other direction. There are small problems still and it doesn't work,
but mentioning anyway.

Right now x86_64 kernel restores only x86_64 images and 64-bit tasks.

Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
---
arch/x86/ia32/ia32entry.S | 2 +
arch/x86/include/asm/unistd_64.h | 4 +
include/linux/kstate-image.h | 36 ++++
include/linux/kstate.h | 2 +-
kernel/kstate/Makefile | 1 +
kernel/kstate/kstate-x86_64.c | 336 ++++++++++++++++++++++++++++++++++++++
6 files changed, 380 insertions(+), 1 deletions(-)
create mode 100644 kernel/kstate/kstate-x86_64.c

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202..b12e911 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,6 @@ ia32_sys_call_table:
.quad sys_inotify_init1
.quad compat_sys_preadv
.quad compat_sys_pwritev
+ .quad sys_checkpoint /* 335 */
+ .quad sys_restart
ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f818294..a839c66 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,6 +657,10 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
__SYSCALL(__NR_preadv, sys_preadv)
#define __NR_pwritev 296
__SYSCALL(__NR_pwritev, sys_pwritev)
+#define __NR_checkpoint 297
+__SYSCALL(__NR_checkpoint, sys_checkpoint)
+#define __NR_restart 298
+__SYSCALL(__NR_restart, sys_restart)


#ifndef __NO_STUBS
diff --git a/include/linux/kstate-image.h b/include/linux/kstate-image.h
index 3c93432..d697d97 100644
--- a/include/linux/kstate-image.h
+++ b/include/linux/kstate-image.h
@@ -28,6 +28,7 @@ struct kstate_image_header {
/* Mutable part. */
/* Arch of the kernel which dumped the image. */
#define KSTATE_ARCH_I386 1
+#define KSTATE_ARCH_X86_64 2
__le32 kernel_arch;
/*
* Distributions are expected to leave image version alone and
@@ -74,6 +75,8 @@ struct kstate_image_task_struct {
#define KSTATE_SEG_NULL 0
#define KSTATE_SEG_USER32_CS 1
#define KSTATE_SEG_USER32_DS 2
+#define KSTATE_SEG_USER64_CS 3
+#define KSTATE_SEG_USER64_DS 4
#define KSTATE_SEG_TLS 0x4000 /* 0100 0000 0000 00xx */
#define KSTATE_SEG_LDT 0x8000 /* 100x xxxx xxxx xxxx */

@@ -110,6 +113,39 @@ struct kstate_image_task_struct_i386 {
/* __u8 xstate[len_xstate]; */
} __packed;

+struct kstate_image_task_struct_x86_64 {
+ __u64 r15;
+ __u64 r14;
+ __u64 r13;
+ __u64 r12;
+ __u64 rbp;
+ __u64 rbx;
+ __u64 r11;
+ __u64 r10;
+ __u64 r9;
+ __u64 r8;
+ __u64 rax;
+ __u64 rcx;
+ __u64 rdx;
+ __u64 rsi;
+ __u64 rdi;
+ __u64 orig_rax;
+ __u64 rip;
+ __u64 rflags;
+ __u64 rsp;
+
+ __u64 fs;
+ __u64 gs;
+ __u16 cs;
+ __u16 ds;
+ __u16 es;
+ __u16 fsindex;
+ __u16 gsindex;
+ __u16 ss;
+
+ __u64 tls_array[3];
+} __packed;
+
struct kstate_image_mm_struct {
struct kstate_object_header hdr;

diff --git a/include/linux/kstate.h b/include/linux/kstate.h
index c4b55b6..95898ec 100644
--- a/include/linux/kstate.h
+++ b/include/linux/kstate.h
@@ -67,7 +67,7 @@ int kstate_collect_all_file(struct kstate_context *ctx);
int kstate_dump_all_file(struct kstate_context *ctx);
int kstate_restore_file(struct kstate_context *ctx, kstate_ref_t *ref);

-#if defined(CONFIG_X86_32)
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
extern const __u32 kstate_kernel_arch;
int kstate_arch_check_image_header(struct kstate_image_header *i);

diff --git a/kernel/kstate/Makefile b/kernel/kstate/Makefile
index ca19a22..0678fc9 100644
--- a/kernel/kstate/Makefile
+++ b/kernel/kstate/Makefile
@@ -7,3 +7,4 @@ kstate-y += kstate-mm.o
kstate-y += kstate-object.o
kstate-y += kstate-task.o
kstate-$(CONFIG_X86_32) += kstate-x86_32.o
+kstate-$(CONFIG_X86_64) += kstate-x86_64.o
diff --git a/kernel/kstate/kstate-x86_64.c b/kernel/kstate/kstate-x86_64.c
new file mode 100644
index 0000000..0d85704
--- /dev/null
+++ b/kernel/kstate/kstate-x86_64.c
@@ -0,0 +1,336 @@
+/* Copyright (C) 2000-2009 Parallels Holdings, Ltd. */
+#include <linux/sched.h>
+
+#include <linux/kstate.h>
+#include <linux/kstate-image.h>
+
+const __u32 kstate_kernel_arch = KSTATE_ARCH_X86_64;
+
+int kstate_arch_check_image_header(struct kstate_image_header *i)
+{
+ if (i->kernel_arch == cpu_to_le32(KSTATE_ARCH_X86_64))
+ return 0;
+ return -EINVAL;
+}
+
+__u32 kstate_task_struct_arch(struct task_struct *tsk)
+{
+ return KSTATE_ARCH_X86_64;
+}
+
+static int check_rflags(__u64 rflags)
+{
+ rflags &= ~X86_EFLAGS_CF;
+ rflags &= ~X86_EFLAGS_PF;
+ rflags &= ~X86_EFLAGS_AF;
+ rflags &= ~X86_EFLAGS_ZF;
+ rflags &= ~X86_EFLAGS_SF;
+ rflags &= ~X86_EFLAGS_TF;
+ rflags &= ~X86_EFLAGS_DF;
+ rflags &= ~X86_EFLAGS_OF;
+ rflags &= ~X86_EFLAGS_NT;
+ rflags &= ~X86_EFLAGS_AC;
+ rflags &= ~X86_EFLAGS_ID;
+ if (rflags != (X86_EFLAGS_IF|0x2)) {
+ pr_debug("%s: rflags %016llx\n", __func__, (unsigned long long)rflags);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int check_segment64(__u16 seg)
+{
+ switch (seg) {
+ case KSTATE_SEG_NULL:
+ case KSTATE_SEG_USER64_CS:
+ case KSTATE_SEG_USER64_DS:
+ return 0;
+ }
+ if (seg & KSTATE_SEG_TLS) {
+ if ((seg & ~KSTATE_SEG_TLS) > GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN) {
+ pr_debug("%s: seg %04x, GDT_ENTRY_TLS_MIN %u, GDT_ENTRY_TLS_MAX %u\n", __func__, seg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX);
+ return -EINVAL;
+ }
+ return 0;
+ }
+ if (seg & KSTATE_SEG_LDT) {
+ if ((seg & ~KSTATE_SEG_LDT) > 0x1fff) {
+ pr_debug("%s: seg %04x\n", __func__, seg);
+ return -EINVAL;
+ }
+ return 0;
+ }
+ pr_debug("%s: seg %04x\n", __func__, seg);
+ return -EINVAL;
+}
+
+static int check_tls(struct desc_struct *desc)
+{
+ if (desc->l != 0 || desc->s != 1 || desc->dpl != 3)
+ return -EINVAL;
+ return 0;
+}
+
+static int check_image_task_struct_x86_64(struct kstate_image_task_struct *tsk_i)
+{
+ struct kstate_image_task_struct_x86_64 *i = (void *)(tsk_i + 1);
+ int rv;
+
+ if (tsk_i->hdr.obj_len < sizeof(*tsk_i) + sizeof(*i))
+ return -EINVAL;
+
+ rv = check_rflags(i->rflags);
+ if (rv < 0)
+ return rv;
+
+ if (i->fs >= TASK_SIZE_MAX)
+ return -EINVAL;
+ if (i->gs >= TASK_SIZE_MAX)
+ return -EINVAL;
+
+ if (i->cs == KSTATE_SEG_NULL)
+ return -EINVAL;
+ rv = check_segment64(i->cs);
+ if (rv < 0)
+ return rv;
+ rv = check_segment64(i->ds);
+ if (rv < 0)
+ return rv;
+ rv = check_segment64(i->es);
+ if (rv < 0)
+ return rv;
+ rv = check_segment64(i->fsindex);
+ if (rv < 0)
+ return rv;
+ rv = check_segment64(i->gsindex);
+ if (rv < 0)
+ return rv;
+ rv = check_segment64(i->ss);
+ if (rv < 0)
+ return rv;
+
+ if (i->tls_array[0]) {
+ rv = check_tls((struct desc_struct *)&i->tls_array[0]);
+ if (rv < 0)
+ return rv;
+ }
+ if (i->tls_array[1]) {
+ rv = check_tls((struct desc_struct *)&i->tls_array[1]);
+ if (rv < 0)
+ return rv;
+ }
+ if (i->tls_array[2]) {
+ rv = check_tls((struct desc_struct *)&i->tls_array[2]);
+ if (rv < 0)
+ return rv;
+ }
+
+ return 0;
+}
+
+int kstate_arch_check_image_task_struct(struct kstate_image_task_struct *i)
+{
+ if (i->tsk_arch == KSTATE_ARCH_X86_64)
+ return check_image_task_struct_x86_64(i);
+ return -EINVAL;
+}
+
+unsigned int kstate_arch_len_task_struct(struct task_struct *tsk)
+{
+ return sizeof(struct kstate_image_task_struct_x86_64);
+}
+
+int kstate_arch_check_task_struct(struct task_struct *tsk)
+{
+ struct restart_block *rb;
+
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(tsk, TIF_IA32)) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+#endif
+ if (test_tsk_thread_flag(tsk, TIF_DEBUG)) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ if (tsk->thread.xstate) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ rb = &task_thread_info(tsk)->restart_block;
+ if (rb->fn != current_thread_info()->restart_block.fn) {
+ WARN(1, "rb->fn = %pF\n", rb->fn);
+ return -EINVAL;
+ }
+ if (tsk->thread.io_bitmap_ptr) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+#ifdef CONFIG_X86_DS
+ if (tsk->thread.ds_ctx) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+#endif
+ return 0;
+}
+
+static __u16 encode_segment(u16 seg)
+{
+ if (seg == 0)
+ return KSTATE_SEG_NULL;
+ BUG_ON((seg & 3) != 3);
+ if (seg & 4)
+ return KSTATE_SEG_LDT | (seg >> 3);
+
+ if (seg == __USER_CS)
+ return KSTATE_SEG_USER64_CS;
+ if (seg == __USER_DS)
+ return KSTATE_SEG_USER64_DS;
+
+ if (GDT_ENTRY_TLS_MIN <= (seg >> 3) && (seg >> 3) <= GDT_ENTRY_TLS_MAX)
+ return KSTATE_SEG_TLS | ((seg >> 3) - GDT_ENTRY_TLS_MIN);
+ BUG();
+}
+
+static u16 decode_segment(__u16 seg)
+{
+ if (seg == KSTATE_SEG_NULL)
+ return 0;
+ if (seg == KSTATE_SEG_USER64_CS)
+ return __USER_CS;
+ if (seg == KSTATE_SEG_USER64_DS)
+ return __USER_DS;
+
+ BUILD_BUG_ON(GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1 != 3);
+ if ((seg & KSTATE_SEG_TLS) == KSTATE_SEG_TLS) {
+ seg &= ~KSTATE_SEG_TLS;
+ if (seg <= GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN)
+ return ((GDT_ENTRY_TLS_MIN + seg) << 3) | 3;
+ }
+ if ((seg & KSTATE_SEG_LDT) == KSTATE_SEG_LDT) {
+ seg &= ~KSTATE_SEG_LDT;
+ return (seg << 3) | 7;
+ }
+ BUG();
+}
+
+static int dump_task_struct_x86_64(struct kstate_context *ctx, struct task_struct *tsk, void *arch_i)
+{
+ struct kstate_image_task_struct_x86_64 *i = arch_i;
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ i->r15 = regs->r15;
+ i->r14 = regs->r14;
+ i->r13 = regs->r13;
+ i->r12 = regs->r12;
+ i->rbp = regs->bp;
+ i->rbx = regs->bx;
+ i->r11 = regs->r11;
+ i->r10 = regs->r10;
+ i->r9 = regs->r9;
+ i->r8 = regs->r8;
+ i->rax = regs->ax;
+ i->rcx = regs->cx;
+ i->rdx = regs->dx;
+ i->rsi = regs->si;
+ i->rdi = regs->di;
+ i->orig_rax = regs->orig_ax;
+ i->rip = regs->ip;
+ i->rflags = regs->flags;
+ i->rsp = regs->sp;
+
+ i->fs = tsk->thread.fs;
+ i->gs = tsk->thread.gs;
+ i->cs = encode_segment(regs->cs);
+ i->ds = encode_segment(tsk->thread.ds);
+ i->es = encode_segment(tsk->thread.es);
+ i->fsindex = encode_segment(tsk->thread.fsindex);
+ i->gsindex = encode_segment(tsk->thread.gsindex);
+ i->ss = encode_segment(regs->ss);
+
+ BUILD_BUG_ON(sizeof(tsk->thread.tls_array[0]) != 8);
+ BUILD_BUG_ON(sizeof(tsk->thread.tls_array) != 3 * 8);
+ memcpy(i->tls_array, tsk->thread.tls_array, sizeof(i->tls_array));
+
+ return 0;
+}
+
+int kstate_arch_dump_task_struct(struct kstate_context *ctx, struct task_struct *tsk, void *arch_i)
+{
+ return dump_task_struct_x86_64(ctx, tsk, arch_i);
+}
+
+static int restore_task_struct_x86_64(struct task_struct *tsk, struct kstate_image_task_struct_x86_64 *i)
+{
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ tsk->thread.sp = (unsigned long)regs;
+ tsk->thread.sp0 = (unsigned long)(regs + 1);
+
+ regs->r15 = i->r15;
+ regs->r14 = i->r14;
+ regs->r13 = i->r13;
+ regs->r12 = i->r12;
+ regs->bp = i->rbp;
+ regs->bx = i->rbx;
+ regs->r11 = i->r11;
+ regs->r10 = i->r10;
+ regs->r9 = i->r9;
+ regs->r8 = i->r8;
+ regs->ax = i->rax;
+ regs->cx = i->rcx;
+ regs->dx = i->rdx;
+ regs->si = i->rsi;
+ regs->di = i->rdi;
+ regs->orig_ax = i->orig_rax;
+ regs->ip = i->rip;
+ regs->flags = i->rflags;
+ regs->sp = i->rsp;
+ tsk->thread.usersp = regs->sp;
+
+ tsk->thread.fs = i->fs;
+ tsk->thread.gs = i->gs;
+ regs->cs = decode_segment(i->cs);
+ tsk->thread.ds = decode_segment(i->ds);
+ tsk->thread.es = decode_segment(i->es);
+ tsk->thread.fsindex = decode_segment(i->fsindex);
+ tsk->thread.gsindex = decode_segment(i->gsindex);
+ regs->ss = decode_segment(i->ss);
+
+ memcpy(tsk->thread.tls_array, i->tls_array, sizeof(i->tls_array));
+
+ set_tsk_thread_flag(tsk, TIF_FORK);
+ return 0;
+}
+
+int kstate_arch_restore_task_struct(struct task_struct *tsk, struct kstate_image_task_struct *i)
+{
+ if (i->tsk_arch == KSTATE_ARCH_X86_64) {
+ return restore_task_struct_x86_64(tsk, (void *)(i + 1));
+ }
+ BUG();
+}
+
+int kstate_arch_check_mm_struct(struct mm_struct *mm)
+{
+ mutex_lock(&mm->context.lock);
+ if (mm->context.ldt || mm->context.size != 0) {
+ mutex_unlock(&mm->context.lock);
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ mutex_unlock(&mm->context.lock);
+ return 0;
+}
+
+unsigned int kstate_arch_len_mm_struct(struct mm_struct *mm)
+{
+ return 0;
+}
+
+int kstate_arch_dump_mm_struct(struct kstate_context *ctx, struct mm_struct *mm, void *arch_i)
+{
+ return 0;
+}
--
1.5.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/