[Devel] [PATCH 23/38] C/R: x86_64 support

Alexey Dobriyan adobriyan at gmail.com
Thu May 21 21:55:17 PDT 2009


In theory and in practice, x86_64 COMPAT=y kernel will restore i386 images
and in other direction. There are small problems still and it doesn't work,
but mentioning anyway.

Right now x86_64 kernel restores only x86_64 images and 64-bit tasks.

Signed-off-by: Alexey Dobriyan <adobriyan at gmail.com>
---
 arch/x86/ia32/ia32entry.S        |    2 +
 arch/x86/include/asm/unistd_64.h |    4 +
 include/linux/kstate-image.h     |   36 ++++
 include/linux/kstate.h           |    2 +-
 kernel/kstate/Makefile           |    1 +
 kernel/kstate/kstate-x86_64.c    |  336 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 380 insertions(+), 1 deletions(-)
 create mode 100644 kernel/kstate/kstate-x86_64.c

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202..b12e911 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,6 @@ ia32_sys_call_table:
 	.quad sys_inotify_init1
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
+	.quad sys_checkpoint		/* 335 */
+	.quad sys_restart
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f818294..a839c66 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,6 +657,10 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 __SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev				296
 __SYSCALL(__NR_pwritev, sys_pwritev)
+#define __NR_checkpoint				297
+__SYSCALL(__NR_checkpoint, sys_checkpoint)
+#define __NR_restart				298
+__SYSCALL(__NR_restart, sys_restart)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/kstate-image.h b/include/linux/kstate-image.h
index 3c93432..d697d97 100644
--- a/include/linux/kstate-image.h
+++ b/include/linux/kstate-image.h
@@ -28,6 +28,7 @@ struct kstate_image_header {
 	/* Mutable part. */
 	/* Arch of the kernel which dumped the image. */
 #define KSTATE_ARCH_I386	1
+#define KSTATE_ARCH_X86_64	2
 	__le32	kernel_arch;
 	/*
 	 * Distributions are expected to leave image version alone and
@@ -74,6 +75,8 @@ struct kstate_image_task_struct {
 #define KSTATE_SEG_NULL		0
 #define KSTATE_SEG_USER32_CS	1
 #define KSTATE_SEG_USER32_DS	2
+#define KSTATE_SEG_USER64_CS	3
+#define KSTATE_SEG_USER64_DS	4
 #define KSTATE_SEG_TLS		0x4000	/* 0100 0000 0000 00xx */
 #define KSTATE_SEG_LDT		0x8000	/* 100x xxxx xxxx xxxx */
 
@@ -110,6 +113,39 @@ struct kstate_image_task_struct_i386 {
 	/* __u8	xstate[len_xstate]; */
 } __packed;
 
+struct kstate_image_task_struct_x86_64 {
+	__u64		r15;
+	__u64		r14;
+	__u64		r13;
+	__u64		r12;
+	__u64		rbp;
+	__u64		rbx;
+	__u64		r11;
+	__u64		r10;
+	__u64		r9;
+	__u64		r8;
+	__u64		rax;
+	__u64		rcx;
+	__u64		rdx;
+	__u64		rsi;
+	__u64		rdi;
+	__u64		orig_rax;
+	__u64		rip;
+	__u64		rflags;
+	__u64		rsp;
+
+	__u64		fs;
+	__u64		gs;
+	__u16		cs;
+	__u16		ds;
+	__u16		es;
+	__u16		fsindex;
+	__u16		gsindex;
+	__u16		ss;
+
+	__u64		tls_array[3];
+} __packed;
+
 struct kstate_image_mm_struct {
 	struct kstate_object_header hdr;
 
diff --git a/include/linux/kstate.h b/include/linux/kstate.h
index c4b55b6..95898ec 100644
--- a/include/linux/kstate.h
+++ b/include/linux/kstate.h
@@ -67,7 +67,7 @@ int kstate_collect_all_file(struct kstate_context *ctx);
 int kstate_dump_all_file(struct kstate_context *ctx);
 int kstate_restore_file(struct kstate_context *ctx, kstate_ref_t *ref);
 
-#if defined(CONFIG_X86_32)
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
 extern const __u32 kstate_kernel_arch;
 int kstate_arch_check_image_header(struct kstate_image_header *i);
 
diff --git a/kernel/kstate/Makefile b/kernel/kstate/Makefile
index ca19a22..0678fc9 100644
--- a/kernel/kstate/Makefile
+++ b/kernel/kstate/Makefile
@@ -7,3 +7,4 @@ kstate-y += kstate-mm.o
 kstate-y += kstate-object.o
 kstate-y += kstate-task.o
 kstate-$(CONFIG_X86_32) += kstate-x86_32.o
+kstate-$(CONFIG_X86_64) += kstate-x86_64.o
diff --git a/kernel/kstate/kstate-x86_64.c b/kernel/kstate/kstate-x86_64.c
new file mode 100644
index 0000000..0d85704
--- /dev/null
+++ b/kernel/kstate/kstate-x86_64.c
@@ -0,0 +1,336 @@
+/* Copyright (C) 2000-2009 Parallels Holdings, Ltd. */
+#include <linux/sched.h>
+
+#include <linux/kstate.h>
+#include <linux/kstate-image.h>
+
+const __u32 kstate_kernel_arch = KSTATE_ARCH_X86_64;
+
+int kstate_arch_check_image_header(struct kstate_image_header *i)
+{
+	if (i->kernel_arch == cpu_to_le32(KSTATE_ARCH_X86_64))
+		return 0;
+	return -EINVAL;
+}
+
+__u32 kstate_task_struct_arch(struct task_struct *tsk)
+{
+	return KSTATE_ARCH_X86_64;
+}
+
+static int check_rflags(__u64 rflags)
+{
+	rflags &= ~X86_EFLAGS_CF;
+	rflags &= ~X86_EFLAGS_PF;
+	rflags &= ~X86_EFLAGS_AF;
+	rflags &= ~X86_EFLAGS_ZF;
+	rflags &= ~X86_EFLAGS_SF;
+	rflags &= ~X86_EFLAGS_TF;
+	rflags &= ~X86_EFLAGS_DF;
+	rflags &= ~X86_EFLAGS_OF;
+	rflags &= ~X86_EFLAGS_NT;
+	rflags &= ~X86_EFLAGS_AC;
+	rflags &= ~X86_EFLAGS_ID;
+	if (rflags != (X86_EFLAGS_IF|0x2)) {
+		pr_debug("%s: rflags %016llx\n", __func__, (unsigned long long)rflags);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int check_segment64(__u16 seg)
+{
+	switch (seg) {
+	case KSTATE_SEG_NULL:
+	case KSTATE_SEG_USER64_CS:
+	case KSTATE_SEG_USER64_DS:
+		return 0;
+	}
+	if (seg & KSTATE_SEG_TLS) {
+		if ((seg & ~KSTATE_SEG_TLS) > GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN) {
+			pr_debug("%s: seg %04x, GDT_ENTRY_TLS_MIN %u, GDT_ENTRY_TLS_MAX %u\n", __func__, seg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX);
+			return -EINVAL;
+		}
+		return 0;
+	}
+	if (seg & KSTATE_SEG_LDT) {
+		if ((seg & ~KSTATE_SEG_LDT) > 0x1fff) {
+			pr_debug("%s: seg %04x\n", __func__, seg);
+			return -EINVAL;
+		}
+		return 0;
+	}
+	pr_debug("%s: seg %04x\n", __func__, seg);
+	return -EINVAL;
+}
+
+static int check_tls(struct desc_struct *desc)
+{
+	if (desc->l != 0 || desc->s != 1 || desc->dpl != 3)
+		return -EINVAL;
+	return 0;
+}
+
+static int check_image_task_struct_x86_64(struct kstate_image_task_struct *tsk_i)
+{
+	struct kstate_image_task_struct_x86_64 *i = (void *)(tsk_i + 1);
+	int rv;
+
+	if (tsk_i->hdr.obj_len < sizeof(*tsk_i) + sizeof(*i))
+		return -EINVAL;
+
+	rv = check_rflags(i->rflags);
+	if (rv < 0)
+		return rv;
+
+	if (i->fs >= TASK_SIZE_MAX)
+		return -EINVAL;
+	if (i->gs >= TASK_SIZE_MAX)
+		return -EINVAL;
+
+	if (i->cs == KSTATE_SEG_NULL)
+		return -EINVAL;
+	rv = check_segment64(i->cs);
+	if (rv < 0)
+		return rv;
+	rv = check_segment64(i->ds);
+	if (rv < 0)
+		return rv;
+	rv = check_segment64(i->es);
+	if (rv < 0)
+		return rv;
+	rv = check_segment64(i->fsindex);
+	if (rv < 0)
+		return rv;
+	rv = check_segment64(i->gsindex);
+	if (rv < 0)
+		return rv;
+	rv = check_segment64(i->ss);
+	if (rv < 0)
+		return rv;
+
+	if (i->tls_array[0]) {
+		rv = check_tls((struct desc_struct *)&i->tls_array[0]);
+		if (rv < 0)
+			return rv;
+	}
+	if (i->tls_array[1]) {
+		rv = check_tls((struct desc_struct *)&i->tls_array[1]);
+		if (rv < 0)
+			return rv;
+	}
+	if (i->tls_array[2]) {
+		rv = check_tls((struct desc_struct *)&i->tls_array[2]);
+		if (rv < 0)
+			return rv;
+	}
+
+	return 0;
+}
+
+int kstate_arch_check_image_task_struct(struct kstate_image_task_struct *i)
+{
+	if (i->tsk_arch == KSTATE_ARCH_X86_64)
+		return check_image_task_struct_x86_64(i);
+	return -EINVAL;
+}
+
+unsigned int kstate_arch_len_task_struct(struct task_struct *tsk)
+{
+	return sizeof(struct kstate_image_task_struct_x86_64);
+}
+
+int kstate_arch_check_task_struct(struct task_struct *tsk)
+{
+	struct restart_block *rb;
+
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(tsk, TIF_IA32)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+#endif
+	if (test_tsk_thread_flag(tsk, TIF_DEBUG)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	if (tsk->thread.xstate) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	rb = &task_thread_info(tsk)->restart_block;
+	if (rb->fn != current_thread_info()->restart_block.fn) {
+		WARN(1, "rb->fn = %pF\n", rb->fn);
+		return -EINVAL;
+	}
+	if (tsk->thread.io_bitmap_ptr) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+#ifdef CONFIG_X86_DS
+	if (tsk->thread.ds_ctx) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+#endif
+	return 0;
+}
+
+static __u16 encode_segment(u16 seg)
+{
+	if (seg == 0)
+		return KSTATE_SEG_NULL;
+	BUG_ON((seg & 3) != 3);
+	if (seg & 4)
+		return KSTATE_SEG_LDT | (seg >> 3);
+
+	if (seg == __USER_CS)
+		return KSTATE_SEG_USER64_CS;
+	if (seg == __USER_DS)
+		return KSTATE_SEG_USER64_DS;
+
+	if (GDT_ENTRY_TLS_MIN <= (seg >> 3) && (seg >> 3) <= GDT_ENTRY_TLS_MAX)
+		return KSTATE_SEG_TLS | ((seg >> 3) - GDT_ENTRY_TLS_MIN);
+	BUG();
+}
+
+static u16 decode_segment(__u16 seg)
+{
+	if (seg == KSTATE_SEG_NULL)
+		return 0;
+	if (seg == KSTATE_SEG_USER64_CS)
+		return __USER_CS;
+	if (seg == KSTATE_SEG_USER64_DS)
+		return __USER_DS;
+
+	BUILD_BUG_ON(GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1 != 3);
+	if ((seg & KSTATE_SEG_TLS) == KSTATE_SEG_TLS) {
+		seg &= ~KSTATE_SEG_TLS;
+		if (seg <= GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN)
+			return ((GDT_ENTRY_TLS_MIN + seg) << 3) | 3;
+	}
+	if ((seg & KSTATE_SEG_LDT) == KSTATE_SEG_LDT) {
+		seg &= ~KSTATE_SEG_LDT;
+		return (seg << 3) | 7;
+	}
+	BUG();
+}
+
+static int dump_task_struct_x86_64(struct kstate_context *ctx, struct task_struct *tsk, void *arch_i)
+{
+	struct kstate_image_task_struct_x86_64 *i = arch_i;
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	i->r15 = regs->r15;
+	i->r14 = regs->r14;
+	i->r13 = regs->r13;
+	i->r12 = regs->r12;
+	i->rbp = regs->bp;
+	i->rbx = regs->bx;
+	i->r11 = regs->r11;
+	i->r10 = regs->r10;
+	i->r9 = regs->r9;
+	i->r8 = regs->r8;
+	i->rax = regs->ax;
+	i->rcx = regs->cx;
+	i->rdx = regs->dx;
+	i->rsi = regs->si;
+	i->rdi = regs->di;
+	i->orig_rax = regs->orig_ax;
+	i->rip = regs->ip;
+	i->rflags = regs->flags;
+	i->rsp = regs->sp;
+
+	i->fs = tsk->thread.fs;
+	i->gs = tsk->thread.gs;
+	i->cs = encode_segment(regs->cs);
+	i->ds = encode_segment(tsk->thread.ds);
+	i->es = encode_segment(tsk->thread.es);
+	i->fsindex = encode_segment(tsk->thread.fsindex);
+	i->gsindex = encode_segment(tsk->thread.gsindex);
+	i->ss = encode_segment(regs->ss);
+
+	BUILD_BUG_ON(sizeof(tsk->thread.tls_array[0]) != 8);
+	BUILD_BUG_ON(sizeof(tsk->thread.tls_array) != 3 * 8);
+	memcpy(i->tls_array, tsk->thread.tls_array, sizeof(i->tls_array));
+
+	return 0;
+}
+
+int kstate_arch_dump_task_struct(struct kstate_context *ctx, struct task_struct *tsk, void *arch_i)
+{
+	return dump_task_struct_x86_64(ctx, tsk, arch_i);
+}
+
+static int restore_task_struct_x86_64(struct task_struct *tsk, struct kstate_image_task_struct_x86_64 *i)
+{
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	tsk->thread.sp = (unsigned long)regs;
+	tsk->thread.sp0 = (unsigned long)(regs + 1);
+
+	regs->r15 = i->r15;
+	regs->r14 = i->r14;
+	regs->r13 = i->r13;
+	regs->r12 = i->r12;
+	regs->bp = i->rbp;
+	regs->bx = i->rbx;
+	regs->r11 = i->r11;
+	regs->r10 = i->r10;
+	regs->r9 = i->r9;
+	regs->r8 = i->r8;
+	regs->ax = i->rax;
+	regs->cx = i->rcx;
+	regs->dx = i->rdx;
+	regs->si = i->rsi;
+	regs->di = i->rdi;
+	regs->orig_ax = i->orig_rax;
+	regs->ip = i->rip;
+	regs->flags = i->rflags;
+	regs->sp = i->rsp;
+	tsk->thread.usersp = regs->sp;
+
+	tsk->thread.fs = i->fs;
+	tsk->thread.gs = i->gs;
+	regs->cs = decode_segment(i->cs);
+	tsk->thread.ds = decode_segment(i->ds);
+	tsk->thread.es = decode_segment(i->es);
+	tsk->thread.fsindex = decode_segment(i->fsindex);
+	tsk->thread.gsindex = decode_segment(i->gsindex);
+	regs->ss = decode_segment(i->ss);
+
+	memcpy(tsk->thread.tls_array, i->tls_array, sizeof(i->tls_array));
+
+	set_tsk_thread_flag(tsk, TIF_FORK);
+	return 0;
+}
+
+int kstate_arch_restore_task_struct(struct task_struct *tsk, struct kstate_image_task_struct *i)
+{
+	if (i->tsk_arch == KSTATE_ARCH_X86_64) {
+		return restore_task_struct_x86_64(tsk, (void *)(i + 1));
+	}
+	BUG();
+}
+
+int kstate_arch_check_mm_struct(struct mm_struct *mm)
+{
+	mutex_lock(&mm->context.lock);
+	if (mm->context.ldt || mm->context.size != 0) {
+		mutex_unlock(&mm->context.lock);
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	mutex_unlock(&mm->context.lock);
+	return 0;
+}
+
+unsigned int kstate_arch_len_mm_struct(struct mm_struct *mm)
+{
+	return 0;
+}
+
+int kstate_arch_dump_mm_struct(struct kstate_context *ctx, struct mm_struct *mm, void *arch_i)
+{
+	return 0;
+}
-- 
1.5.6.5

_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list