[CRIU] [PATCH] arch/ppc64: Add PowerPC 64 LE support

Laurent Dufour ldufour at linux.vnet.ibm.com
Wed Apr 29 08:47:15 PDT 2015


This patch initiates the ppc64le architecture support in CRIU.

Note that ppc64 (Big Endian) architecture is not yet supported since there
are still several issues to address with this architecture. However, in the
long term, the two architectures should be addressed using the almost the
same code, so sharing the ppc64 directory.

Major ppc64 issues:

Loader is not involved when the parasite code is loaded. So no relocation
is done for the parasite code. As a consequence r2 must be set manually
when entering the parasite code, and GOT is not filled.

Furthermore, the r2 fixup code at the services's global address which has
not been fixed by the loader should not be run. Branching at local address,
as the assembly code does is jumping over it.

On the long term, relocation should be done when loading the parasite code.

We are introducing 2 trampolines for the 2 entry points of the restorer
blob.  These entry points are dealing with r2. These ppc64 specific entry
points are overwritting the standard one in sigreturn_restore() from
cr-restore.c.  Instead of using #ifdef, we may introduce a per arch wrapper
here.

CRIU needs 2 kernel patches to be run powerpc which are not yet upstream:
 - Tracking the vDSO remapping
 - Enabling the kcmp system call on powerpc

Feature not yet supported:
- Altivec registers C/R
- VSX registers C/R
- TM support
- all lot of things I missed..

Signed-off-by: Laurent Dufour <ldufour at linux.vnet.ibm.com>
---
 Makefile                                  |  18 +-
 arch/ppc64/Makefile                       |  55 +++
 arch/ppc64/cpu.c                          |  45 +++
 arch/ppc64/crtools.c                      | 293 +++++++++++++++
 arch/ppc64/include/asm/atomic.h           | 112 ++++++
 arch/ppc64/include/asm/bitops.h           |  11 +
 arch/ppc64/include/asm/bitsperlong.h      |   6 +
 arch/ppc64/include/asm/cmpxchg.h          |  96 +++++
 arch/ppc64/include/asm/cpu.h              |   1 +
 arch/ppc64/include/asm/dump.h             |  11 +
 arch/ppc64/include/asm/fpu.h              |   4 +
 arch/ppc64/include/asm/int.h              |   6 +
 arch/ppc64/include/asm/linkage.h          |  20 +
 arch/ppc64/include/asm/page.h             |  23 ++
 arch/ppc64/include/asm/parasite-syscall.h |  17 +
 arch/ppc64/include/asm/parasite.h         |   7 +
 arch/ppc64/include/asm/prlimit.h          |  14 +
 arch/ppc64/include/asm/processor-flags.h  |   4 +
 arch/ppc64/include/asm/restore.h          |  33 ++
 arch/ppc64/include/asm/restorer.h         | 136 +++++++
 arch/ppc64/include/asm/string.h           |  11 +
 arch/ppc64/include/asm/types.h            | 111 ++++++
 arch/ppc64/include/asm/vdso.h             | 172 +++++++++
 arch/ppc64/parasite-head.S                |  44 +++
 arch/ppc64/restorer-trampoline.S          |  33 ++
 arch/ppc64/restorer.c                     |  14 +
 arch/ppc64/syscall-common-ppc64.S         |  32 ++
 arch/ppc64/syscall-ppc64.def              |  99 +++++
 arch/ppc64/syscalls-ppc64.sh              |  54 +++
 arch/ppc64/vdso-pie.c                     | 594 ++++++++++++++++++++++++++++++
 arch/ppc64/vdso-trampoline.S              |  11 +
 arch/ppc64/vdso.c                         | 309 ++++++++++++++++
 cr-restore.c                              |   5 +
 include/image.h                           |   4 +
 pie/Makefile                              |   6 +
 pie/pie.lds.S.in                          |   2 +
 protobuf/Makefile                         |   1 +
 protobuf/core-ppc64.proto                 |  23 ++
 protobuf/core.proto                       |   3 +
 39 files changed, 2439 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ed2a1992efd5..208557e33b21 100644
--- a/Makefile
+++ b/Makefile
@@ -43,7 +43,6 @@ ARCH ?= $(shell uname -m | sed		\
 		-e s/sun4u/sparc64/	\
 		-e s/s390x/s390/	\
 		-e s/parisc64/parisc/	\
-		-e s/ppc.*/powerpc/	\
 		-e s/mips.*/mips/	\
 		-e s/sh[234].*/sh/)
 
@@ -85,6 +84,20 @@ ifeq ($(SRCARCH),arm)
 	export PROTOUFIX
 endif
 
+#
+# The PowerPC 64 bits architecture could be big or little endian.
+# They are handled in the same way.
+#
+ifeq ($(shell echo $(ARCH) | sed -e 's/ppc64.*/ppc64/'),ppc64)
+	ifeq ($(ARCH),ppc64)
+		error	:= $(error ppc64 big endian not yet supported)
+	endif
+	SRCARCH	:= ppc64
+	DEFINES := -DCONFIG_PPC64
+	LDARCH	:= powerpc:common64
+	VDSO	:= y
+endif
+
 SRCARCH		?= $(ARCH)
 LDARCH		?= $(SRCARCH)
 
@@ -193,6 +206,9 @@ PROGRAM-BUILTINS	+= $(ARCH_DIR)/vdso-pie.o
 ifeq ($(SRCARCH),aarch64)
 PROGRAM-BUILTINS	+= $(ARCH_DIR)/intraprocedure.o
 endif
+ifeq ($(SRCARCH),ppc64)
+PROGRAM-BUILTINS	+= $(ARCH_DIR)/vdso-trampoline.o
+endif
 endif
 
 PROGRAM-BUILTINS	+= pie/util-fd.o
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile
new file mode 100644
index 000000000000..c5d332364aa2
--- /dev/null
+++ b/arch/ppc64/Makefile
@@ -0,0 +1,55 @@
+targets		+= syscalls
+targets		+= crtools
+
+SYS-ASM		:= syscalls.S
+
+syscalls-asm-y	+= $(SYS-ASM:.S=).o
+crtools-obj-y	+= crtools.o
+crtools-obj-y	+= cpu.o
+
+SYS-DEF		:= syscall-ppc64.def
+SYS-ASM-COMMON	:= syscall-common-ppc64.S
+
+SYS-TYPES	:= include/syscall-types.h
+SYS-CODES	:= include/syscall-codes.h
+SYS-PROTO	:= include/syscall.h
+
+SYS-GEN		:= syscalls-ppc64.sh
+
+SYS-EXEC-TBL	:= sys-exec-tbl.c
+
+syscalls-asm-y-asmflags	:= -fpie -Wstrict-prototypes -Wa,--noexecstack
+syscalls-asm-y-asmflags += -nostdlib -fomit-frame-pointer -I$(obj)
+
+ASMFLAGS	+= -D__ASSEMBLY__
+
+$(obj)/$(SYS-ASM): $(obj)/$(SYS-GEN) $(obj)/$(SYS-DEF) $(obj)/$(SYS-ASM-COMMON) $(SYS-TYPES)
+	$(E) "  GEN     " $@
+	$(Q) $(SH)				\
+		$(obj)/$(SYS-GEN) --asm		\
+		$(obj)/$(SYS-DEF)		\
+		$(SYS-CODES)			\
+		$(SYS-PROTO)			\
+		$(obj)/$(SYS-ASM)		\
+		$(SYS-ASM-COMMON)		\
+		$(SYS-TYPES)
+
+$(obj)/syscalls.o: $(obj)/$(SYS-ASM)
+
+$(obj)/$(SYS-EXEC-TBL): $(obj)/$(SYS-GEN) $(obj)/$(SYS-DEF)
+	$(E) "  GEN     " $@
+	$(Q) $(SH)				\
+		$(obj)/$(SYS-GEN) --exec	\
+		$(obj)/$(SYS-DEF)		\
+		$(obj)/$(SYS-EXEC-TBL)
+
+_all += $(obj)/$(SYS-EXEC-TBL)
+
+cleanup-y += $(obj)/$(SYS-EXEC-TBL) $(obj)/$(SYS-ASM)
+cleanup-y += $(SYS-CODES)
+cleanup-y += $(SYS-PROTO)
+
+ifneq ($(MAKECMDGOALS),clean)
+deps-after := $(obj)/$(SYS-ASM)
+incdeps := y
+endif
diff --git a/arch/ppc64/cpu.c b/arch/ppc64/cpu.c
new file mode 100644
index 000000000000..040fe14fcfb7
--- /dev/null
+++ b/arch/ppc64/cpu.c
@@ -0,0 +1,45 @@
+#undef	LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+#include <errno.h>
+#include "cpu.h"
+
+bool cpu_has_feature(unsigned int feature)
+{
+	return false;
+}
+
+int cpu_init(void)
+{
+	return 0;
+}
+
+int cpu_dump_cpuinfo(void)
+{
+	return 0;
+}
+
+int cpu_validate_cpuinfo(void)
+{
+	return 0;
+}
+
+int cpu_dump_cpuinfo_single(void)
+{
+	return -ENOTSUP;
+}
+
+int cpu_validate_image_cpuinfo_single(void)
+{
+	return -ENOTSUP;
+}
+
+int cpuinfo_dump(void)
+{
+	return -ENOTSUP;
+}
+
+int cpuinfo_check(void)
+{
+	return -ENOTSUP;
+}
diff --git a/arch/ppc64/crtools.c b/arch/ppc64/crtools.c
new file mode 100644
index 000000000000..31cef5d222d8
--- /dev/null
+++ b/arch/ppc64/crtools.c
@@ -0,0 +1,293 @@
+#include <string.h>
+#include <unistd.h>
+#include <elf.h>
+#include <sys/user.h>
+
+#include "asm/types.h"
+#include "asm/fpu.h"
+#include "asm/restorer.h"
+
+#include "cr_options.h"
+#include "compiler.h"
+#include "ptrace.h"
+#include "parasite-syscall.h"
+#include "syscall.h"
+#include "log.h"
+#include "util.h"
+#include "cpu.h"
+#include "errno.h"
+
+#include "protobuf.h"
+#include "protobuf/core.pb-c.h"
+#include "protobuf/creds.pb-c.h"
+
+/*
+ * Injected syscall instruction
+ */
+const u32 code_syscall[] = {
+	0x44000002,		/* sc 		*/
+	0x0fe00000		/* twi 31,0,0	*/
+};
+
+const int code_syscall_size = sizeof(code_syscall);
+
+static inline void __check_code_syscall(void)
+{
+	BUILD_BUG_ON(sizeof(code_syscall) != BUILTIN_SYSCALL_SIZE);
+	BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
+}
+
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
+{
+	regs->nip = new_ip;
+	if (stack)
+		regs->gpr[1] = (unsigned long) stack;
+	regs->trap = 0;
+}
+
+bool arch_can_dump_task(pid_t pid)
+{
+	/*
+	 * TODO: We should detect 32bit task when BE support is done.
+	 */
+	return true;
+}
+
+int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
+		unsigned long arg1,
+		unsigned long arg2,
+		unsigned long arg3,
+		unsigned long arg4,
+		unsigned long arg5,
+		unsigned long arg6)
+{
+	user_regs_struct_t regs = ctl->orig.regs;
+	int err;
+
+	regs.gpr[0] = (unsigned long)nr;
+	regs.gpr[3] = arg1;
+	regs.gpr[4] = arg2;
+	regs.gpr[5] = arg3;
+	regs.gpr[6] = arg4;
+	regs.gpr[7] = arg5;
+	regs.gpr[8] = arg6;
+
+	err = __parasite_execute_syscall(ctl, &regs);
+
+	*ret = regs.gpr[3];
+	return err;
+}
+
+/* This is the layout of the POWER7 VSX registers and the way they
+ * overlap with the existing FPR and VMX registers.
+ *
+ *                 VSR doubleword 0               VSR doubleword 1
+ *         ----------------------------------------------------------------
+ * VSR[0]  |             FPR[0]            |                              |
+ *         ----------------------------------------------------------------
+ * VSR[1]  |             FPR[1]            |                              |
+ *         ----------------------------------------------------------------
+ *         |              ...              |                              |
+ *         ----------------------------------------------------------------
+ * VSR[30] |             FPR[30]           |                              |
+ *         ----------------------------------------------------------------
+ * VSR[31] |             FPR[31]           |                              |
+ *         ----------------------------------------------------------------
+ * VSR[32] |                             VR[0]                            |
+ *         ----------------------------------------------------------------
+ * VSR[33] |                             VR[1]                            |
+ *         ----------------------------------------------------------------
+ *         |                              ...                             |
+ *         ----------------------------------------------------------------
+ * VSR[62] |                             VR[30]                           |
+ *         ----------------------------------------------------------------
+ * VSR[63] |                             VR[31]                           |
+ *         ----------------------------------------------------------------
+ *
+ * PTRACE_GETFPREGS returns FPR[0..31] + FPSCR
+ * PTRACE_GETVRREGS returns VR[0..31] + VSCR + VRSAVE
+ * PTRACE_GETVSRREGS returns VSR[0..31]
+ *
+ * PTRACE_GETVSRREGS and PTRACE_GETFPREGS are required since we need
+ * to save FPSCR too.
+ */
+static int get_fpu_regs(pid_t pid, CoreEntry *core)
+{
+	elf_fpregset_t fpregs;
+	UserPpc64FpstateEntry *fpe;
+	int i;
+
+	if (ptrace(PTRACE_GETFPREGS, pid, 0, (void *)&fpregs) < 0) {
+		pr_err("Couldn't get floating-point registers.");
+		return -1;
+	}
+
+	fpe = xmalloc(sizeof(UserPpc64FpstateEntry));
+	if (!fpe)
+		return -1;
+	user_ppc64_fpstate_entry__init(fpe);
+
+	fpe->n_fpregs = NFPREG;
+	fpe->fpregs = xmalloc(fpe->n_fpregs * sizeof(fpe->fpregs[0]));
+	if (!fpe->fpregs) {
+		xfree(fpe);
+		return -1;
+	}
+
+	/* FPSRC is the last (33th) register in the set */
+	for (i=0; i<NFPREG; i++)
+		fpe->fpregs[i] = fpregs[i];
+
+	core->ti_ppc64->fpstate = fpe;
+	return 0;
+}
+
+static void put_fpu_regs(mcontext_t *mc, UserPpc64FpstateEntry *fpe)
+{
+	int i;
+
+	for (i=0; i<fpe->n_fpregs; i++)
+		mc->fp_regs[i] = (double)(fpe->fpregs[i]);
+}
+
+int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
+{
+	int i;
+
+	pr_info("Dumping GP/FPU registers for %d\n", pid);
+
+	/*
+	 * This is inspired by kernel function check_syscall_restart in
+	 * arch/powerpc/kernel/signal.c
+	 */
+#ifndef TRAP
+#define TRAP(r)              ((r).trap & ~0xF)
+#endif
+
+	if (TRAP(regs) == 0x0C00 && regs.ccr & 0x10000000) {
+		/* Restart the system call */
+		switch (regs.gpr[3]) {
+		case ERESTARTNOHAND:
+		case ERESTARTSYS:
+		case ERESTARTNOINTR:
+			regs.gpr[3] = regs.orig_gpr3;
+			regs.nip -= 4;
+			break;
+		case ERESTART_RESTARTBLOCK:
+			regs.gpr[0] = __NR_restart_syscall;
+			regs.nip -= 4;
+			break;
+		}
+	}
+
+	/* Resetting trap since we are now comming from user space. */
+	regs.trap = 0;
+
+#define assign_reg(dst, src, e) do {			\
+		dst->e = (__typeof__(dst->e))src.e;	\
+} while (0)
+
+	for (i=0; i<32; i++)
+		assign_reg(core->ti_ppc64->gpregs, regs, gpr[i]);
+
+	assign_reg(core->ti_ppc64->gpregs, regs, nip);
+	assign_reg(core->ti_ppc64->gpregs, regs, msr);
+	assign_reg(core->ti_ppc64->gpregs, regs, orig_gpr3);
+	assign_reg(core->ti_ppc64->gpregs, regs, ctr);
+	assign_reg(core->ti_ppc64->gpregs, regs, link);
+	assign_reg(core->ti_ppc64->gpregs, regs, xer);
+	assign_reg(core->ti_ppc64->gpregs, regs, ccr);
+	assign_reg(core->ti_ppc64->gpregs, regs, trap);
+#undef assign_reg
+
+	if (get_fpu_regs(pid, core))
+		return -1;
+
+	return 0;
+}
+
+int arch_alloc_thread_info(CoreEntry *core)
+{
+	ThreadInfoPpc64 *ti_ppc64;
+	UserPpc64RegsEntry *regs;
+
+	ti_ppc64 = xmalloc(sizeof(*ti_ppc64));
+	if(!ti_ppc64)
+		goto err;
+	thread_info_ppc64__init(ti_ppc64);
+	CORE_THREAD_ARCH_INFO(core) = ti_ppc64;
+
+	/* user_ppc64_regs_entry */
+	regs = xmalloc(sizeof(*regs));
+	if (!regs)
+		goto err;
+	user_ppc64_regs_entry__init(regs);
+
+	regs->gpr = xmalloc(32*sizeof(uint64_t));
+	if (!regs->gpr)
+		goto err;
+	regs->n_gpr = 32;
+
+	ti_ppc64->gpregs = regs;
+
+	return 0;
+err:
+	return -1;
+}
+
+void arch_free_thread_info(CoreEntry *core)
+{
+        if (CORE_THREAD_ARCH_INFO(core)) {
+		if (CORE_THREAD_ARCH_INFO(core)->fpstate) {
+			xfree(CORE_THREAD_ARCH_INFO(core)->fpstate->fpregs);
+			xfree(CORE_THREAD_ARCH_INFO(core)->fpstate);
+		}
+                xfree(CORE_THREAD_ARCH_INFO(core)->gpregs->gpr);
+                xfree(CORE_THREAD_ARCH_INFO(core)->gpregs);
+                xfree(CORE_THREAD_ARCH_INFO(core));
+                CORE_THREAD_ARCH_INFO(core) = NULL;
+        }
+}
+
+int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
+{
+	if (CORE_THREAD_ARCH_INFO(core)->fpstate)
+		put_fpu_regs(&sigframe->uc.uc_mcontext,
+			     CORE_THREAD_ARCH_INFO(core)->fpstate);
+	return 0;
+}
+
+int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r)
+{
+	int i;
+
+	/* r0 to r31 */
+	for (i=0; i<32; i++)
+		f->uc.uc_mcontext.gp_regs[i] = r->gpr[i];
+
+	f->uc.uc_mcontext.gp_regs[PT_NIP] = r->nip;
+	f->uc.uc_mcontext.gp_regs[PT_MSR] = r->msr;
+	f->uc.uc_mcontext.gp_regs[PT_ORIG_R3] = r->orig_gpr3;
+	f->uc.uc_mcontext.gp_regs[PT_CTR] = r->ctr;
+	f->uc.uc_mcontext.gp_regs[PT_LNK] = r->link;
+	f->uc.uc_mcontext.gp_regs[PT_XER] = r->xer;
+	f->uc.uc_mcontext.gp_regs[PT_CCR] = r->ccr;
+	f->uc.uc_mcontext.gp_regs[PT_TRAP] = r->trap;
+
+	return 0;
+}
+
+void *mmap_seized(struct parasite_ctl *ctl,
+		  void *addr, size_t length, int prot,
+		  int flags, int fd, off_t offset)
+{
+	unsigned long map = 0;
+	int err;
+
+	err = syscall_seized(ctl, __NR_mmap, &map,
+			(unsigned long)addr, length, prot, flags, fd, offset);
+	if (err < 0 || (long)map < 0)
+		map = 0;
+
+	return (void *)map;
+}
diff --git a/arch/ppc64/include/asm/atomic.h b/arch/ppc64/include/asm/atomic.h
new file mode 100644
index 000000000000..4fa33b1c7005
--- /dev/null
+++ b/arch/ppc64/include/asm/atomic.h
@@ -0,0 +1,112 @@
+#ifndef __CR_ATOMIC_H__
+#define __CR_ATOMIC_H__
+
+/*
+ * PowerPC atomic operations
+ *
+ * Copied from kernel header file arch/powerpc/include/asm/atomic.h
+ */
+
+typedef struct {
+        int counter;
+} atomic_t;
+
+#include "asm/cmpxchg.h"
+
+#define PPC_ATOMIC_ENTRY_BARRIER	"lwsync \n"
+#define PPC_ATOMIC_EXIT_BARRIER		"sync  	\n"
+
+#define ATOMIC_INIT(i)		{ (i) }
+
+static __inline__ int atomic_read(const atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+
+	return t;
+}
+
+static __inline__ void atomic_set(atomic_t *v, int i)
+{
+	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+}
+
+#define ATOMIC_OP(op, asm_op)						\
+static __inline__ void atomic_##op(int a, atomic_t *v)			\
+{									\
+	int t;								\
+									\
+	__asm__ __volatile__(						\
+"1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
+	#asm_op " %0,%2,%0\n"						\
+"	stwcx.	%0,0,%3 \n"						\
+"	bne-	1b\n"							\
+	: "=&r" (t), "+m" (v->counter)					\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+}									\
+
+ATOMIC_OP(add, add)
+ATOMIC_OP(sub, subf)
+
+#undef ATOMIC_OP
+
+static __inline__ void atomic_inc(atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+"1:	lwarx	%0,0,%2		# atomic_inc\n\
+	addic	%0,%0,1\n"
+"	stwcx.	%0,0,%2 \n\
+	bne-	1b"
+	: "=&r" (t), "+m" (v->counter)
+	: "r" (&v->counter)
+	: "cc", "xer");
+}
+
+static __inline__ int atomic_inc_return(atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+	PPC_ATOMIC_ENTRY_BARRIER \
+"1:	lwarx	%0,0,%1		# atomic_inc_return\n\
+	addic	%0,%0,1\n"
+"	stwcx.	%0,0,%1 \n\
+	bne-	1b \n" \
+	PPC_ATOMIC_EXIT_BARRIER
+	: "=&r" (t)
+	: "r" (&v->counter)
+	: "cc", "xer", "memory");
+
+	return t;
+}
+
+/*
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+
+static __inline__ void atomic_dec(atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+"1:	lwarx	%0,0,%2		# atomic_dec\n\
+	addic	%0,%0,-1\n"
+"	stwcx.	%0,0,%2\n\
+	bne-	1b"
+	: "=&r" (t), "+m" (v->counter)
+	: "r" (&v->counter)
+	: "cc", "xer");
+}
+
+#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+
+#endif /* __CR_ATOMIC_H__ */
diff --git a/arch/ppc64/include/asm/bitops.h b/arch/ppc64/include/asm/bitops.h
new file mode 100644
index 000000000000..f310c5284a2f
--- /dev/null
+++ b/arch/ppc64/include/asm/bitops.h
@@ -0,0 +1,11 @@
+#ifndef __CR_BITOPS_H__
+#define __CR_BITOPS_H__
+
+#include "compiler.h"
+/*
+ * TODO: create some optimized version instead of falling down with the
+ *  generic ones.
+ */
+#include "asm-generic/bitops.h"
+
+#endif /* __CR_BITOPS_H__ */
diff --git a/arch/ppc64/include/asm/bitsperlong.h b/arch/ppc64/include/asm/bitsperlong.h
new file mode 100644
index 000000000000..d95727d193e8
--- /dev/null
+++ b/arch/ppc64/include/asm/bitsperlong.h
@@ -0,0 +1,6 @@
+#ifndef __CR_BITSPERLONG_H__
+#define __CR_BITSPERLONG_H__
+
+#define BITS_PER_LONG 64
+
+#endif /* __CR_BITSPERLONG_H__ */
diff --git a/arch/ppc64/include/asm/cmpxchg.h b/arch/ppc64/include/asm/cmpxchg.h
new file mode 100644
index 000000000000..b93fbdef06c7
--- /dev/null
+++ b/arch/ppc64/include/asm/cmpxchg.h
@@ -0,0 +1,96 @@
+#ifndef __CR_CMPXCHG_H__
+#define __CR_CMPXCHG_H__
+
+/*
+ * Copied from kernel header file arch/powerpc/include/asm/cmpxchg.h
+ */
+
+#define PPC_ACQUIRE_BARRIER		"isync	\n"
+#define PPC_RELEASE_BARRIER		"lwsync	\n"
+
+/*
+ * Compare and exchange - if *p == old, set it to new,
+ * and return the old value of *p.
+ */
+
+static __always_inline unsigned long
+__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_RELEASE_BARRIER \
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	stwcx.	%4,0,%2\n\
+	bne-	1b \n" \
+	PPC_ACQUIRE_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+	PPC_RELEASE_BARRIER \
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
+	cmpd	0,%0,%3\n\
+	bne-	2f\n\
+	stdcx.	%4,0,%2\n\
+	bne-	1b \n" \
+	PPC_ACQUIRE_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid cmpxchg().  */
+#ifdef CR_DEBUG
+static inline void __cmpxchg_called_with_bad_pointer(void)
+{
+	__asm__ __volatile__ (
+		"1:	twi 	31,0,0	# trap\n"
+		"	b 	1b"
+		: : : "memory");
+}
+#else
+extern void __cmpxchg_called_with_bad_pointer(void);
+#endif
+
+static __always_inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
+	  unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+	case 8:
+		return __cmpxchg_u64(ptr, old, new);
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr, o, n)						 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+#endif /* __CR_CMPXCHG_H__ */
diff --git a/arch/ppc64/include/asm/cpu.h b/arch/ppc64/include/asm/cpu.h
new file mode 100644
index 000000000000..59118c211d10
--- /dev/null
+++ b/arch/ppc64/include/asm/cpu.h
@@ -0,0 +1 @@
+#include <stdbool.h>
diff --git a/arch/ppc64/include/asm/dump.h b/arch/ppc64/include/asm/dump.h
new file mode 100644
index 000000000000..1505fd2983b0
--- /dev/null
+++ b/arch/ppc64/include/asm/dump.h
@@ -0,0 +1,11 @@
+#ifndef __CR_ASM_DUMP_H__
+#define __CR_ASM_DUMP_H__
+
+extern int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core);
+extern int arch_alloc_thread_info(CoreEntry *core);
+extern void arch_free_thread_info(CoreEntry *core);
+
+
+#define core_put_tls(core, tls)
+
+#endif
diff --git a/arch/ppc64/include/asm/fpu.h b/arch/ppc64/include/asm/fpu.h
new file mode 100644
index 000000000000..7f476d541a7d
--- /dev/null
+++ b/arch/ppc64/include/asm/fpu.h
@@ -0,0 +1,4 @@
+#ifndef __CR_ASM_FPU_H__
+#define __CR_ASM_FPU_H__
+
+#endif /* __CR_ASM_FPU_H__ */
diff --git a/arch/ppc64/include/asm/int.h b/arch/ppc64/include/asm/int.h
new file mode 100644
index 000000000000..642804e9b485
--- /dev/null
+++ b/arch/ppc64/include/asm/int.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_INT_H__
+#define __CR_ASM_INT_H__
+
+#include "asm-generic/int.h"
+
+#endif /* __CR_ASM_INT_H__ */
diff --git a/arch/ppc64/include/asm/linkage.h b/arch/ppc64/include/asm/linkage.h
new file mode 100644
index 000000000000..03e01dc96543
--- /dev/null
+++ b/arch/ppc64/include/asm/linkage.h
@@ -0,0 +1,20 @@
+#ifndef __CR_LINKAGE_H__
+#define __CR_LINKAGE_H__
+
+#ifdef __ASSEMBLY__
+
+#define GLOBAL(name)		\
+	.globl name;		\
+	name:
+
+#define ENTRY(name)		\
+	.globl name;		\
+	.type name, @function;	\
+	name:
+
+#define END(sym)		\
+	.size sym, . - sym
+
+#endif  /* __ASSEMBLY__ */
+
+#endif /* __CR_LINKAGE_H__ */
diff --git a/arch/ppc64/include/asm/page.h b/arch/ppc64/include/asm/page.h
new file mode 100644
index 000000000000..169c6943d844
--- /dev/null
+++ b/arch/ppc64/include/asm/page.h
@@ -0,0 +1,23 @@
+#ifndef __CR_ASM_PAGE_H__
+#define __CR_ASM_PAGE_H__
+
+/*
+ * Default config for Pseries is to use 64K pages.
+ * See kernel file arch/powerpc/configs/pseries_*defconfig
+ */
+#ifndef PAGE_SHIFT
+# define PAGE_SHIFT	16
+#endif
+
+#ifndef PAGE_SIZE
+# define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#endif
+
+#ifndef PAGE_MASK
+# define PAGE_MASK	(~(PAGE_SIZE - 1))
+#endif
+
+#define PAGE_PFN(addr)	((addr) / PAGE_SIZE)
+#define page_size()	PAGE_SIZE
+
+#endif /* __CR_ASM_PAGE_H__ */
diff --git a/arch/ppc64/include/asm/parasite-syscall.h b/arch/ppc64/include/asm/parasite-syscall.h
new file mode 100644
index 000000000000..7665e207b75e
--- /dev/null
+++ b/arch/ppc64/include/asm/parasite-syscall.h
@@ -0,0 +1,17 @@
+#ifndef __CR_ASM_PARASITE_SYSCALL_H__
+#define __CR_ASM_PARASITE_SYSCALL_H__
+
+struct parasite_ctl;
+
+#define ARCH_SI_TRAP TRAP_BRKPT
+
+extern const char code_syscall[];
+extern const int code_syscall_size;
+
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs);
+
+void *mmap_seized(struct parasite_ctl *ctl,
+		  void *addr, size_t length, int prot,
+		  int flags, int fd, off_t offset);
+
+#endif
diff --git a/arch/ppc64/include/asm/parasite.h b/arch/ppc64/include/asm/parasite.h
new file mode 100644
index 000000000000..fdbc340b05e2
--- /dev/null
+++ b/arch/ppc64/include/asm/parasite.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_PARASITE_H__
+#define __ASM_PARASITE_H__
+
+/* TLS is accessed through r13, which is already processed */
+static inline void arch_get_tls(tls_t *ptls) { (void)ptls; }
+
+#endif
diff --git a/arch/ppc64/include/asm/prlimit.h b/arch/ppc64/include/asm/prlimit.h
new file mode 100644
index 000000000000..6746ba0e6f19
--- /dev/null
+++ b/arch/ppc64/include/asm/prlimit.h
@@ -0,0 +1,14 @@
+#ifndef __CR_PRLIMIT_H__
+#define __CR_PRLIMIT_H__
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "config.h"
+
+#ifndef CONFIG_HAS_PRLIMIT
+extern int prlimit(pid_t pid, int resource, const struct rlimit *new_rlimit, struct rlimit *old_rlimit);
+#endif
+
+#endif /* __CR_PRLIMIT_H__ */
diff --git a/arch/ppc64/include/asm/processor-flags.h b/arch/ppc64/include/asm/processor-flags.h
new file mode 100644
index 000000000000..c1888af36fa0
--- /dev/null
+++ b/arch/ppc64/include/asm/processor-flags.h
@@ -0,0 +1,4 @@
+#ifndef __CR_PROCESSOR_FLAGS_H__
+#define __CR_PROCESSOR_FLAGS_H__
+
+#endif
diff --git a/arch/ppc64/include/asm/restore.h b/arch/ppc64/include/asm/restore.h
new file mode 100644
index 000000000000..3ca0c534d843
--- /dev/null
+++ b/arch/ppc64/include/asm/restore.h
@@ -0,0 +1,33 @@
+#ifndef __CR_ASM_RESTORE_H__
+#define __CR_ASM_RESTORE_H__
+
+#include "asm/restorer.h"
+
+#include "protobuf/core.pb-c.h"
+
+/*
+ * Set R2 to blob + 8000 which is the default value
+ * Jump to restore_task_exec_start + 8 since R2 is already set (local call)
+ */
+#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start,		\
+			      task_args)				\
+	asm volatile(							\
+		"mr	1,%0		\n"				\
+		"mr	3,%1		\n"				\
+		"mtctr	3		\n"				\
+		"mr   	3,%2		\n"				\
+	        "mr	2,%3		\n"				\
+		"bctr			\n"				\
+		:							\
+		: "r"(new_sp),						\
+		  "r"((unsigned long)restore_task_exec_start),		\
+		  "r"(task_args),					\
+		  "r"((unsigned long)task_args->bootstrap_start + 0x8000) \
+		: "sp", "1", "2", "3", "memory")
+
+/* There is nothing to do since TLS is accessed through r13 */
+#define core_get_tls(pcore, ptls)
+
+int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core);
+
+#endif /* __CR_ASM_RESTORE_H__ */
diff --git a/arch/ppc64/include/asm/restorer.h b/arch/ppc64/include/asm/restorer.h
new file mode 100644
index 000000000000..0549992def65
--- /dev/null
+++ b/arch/ppc64/include/asm/restorer.h
@@ -0,0 +1,136 @@
+#ifndef __CR_ASM_RESTORER_H__
+#define __CR_ASM_RESTORER_H__
+
+#include <asm/ptrace.h>
+#include <asm/elf.h>
+#include <asm/types.h>
+
+/*
+ * sigcontext structure defined in file
+ *	/usr/include/powerpc64le-linux-gnu/bits/sigcontext.h,
+ * included from /usr/include/signal.h
+ *
+ * Kernel definition can be found in arch/powerpc/include/uapi/asm/sigcontext.h
+ */
+#include <signal.h>
+
+// XXX: the idetifier rt_sigcontext is expected to be struct by the CRIU code
+#define rt_sigcontext sigcontext
+
+#include "sigframe.h"
+#define SIGFRAME_OFFSET 0
+
+/* Copied from the Linux kernel header arch/powerpc/include/asm/ptrace.h */
+#define USER_REDZONE_SIZE       512
+
+/* Copied from the Linux kernel source file arch/powerpc/kernel/signal_64.c */
+#define TRAMP_SIZE      	6
+
+/*
+ * ucontext defined in /usr/include/powerpc64le-linux-gnu/sys/ucontext.h
+ */
+struct rt_sigframe {
+        /* sys_rt_sigreturn requires the ucontext be the first field */
+        struct ucontext uc;
+#if 1
+	/*
+	 * XXX: Assuming that transactional is turned on by default in
+	 * most of the Linux distribution.
+	 */
+        struct ucontext uc_transact;
+#endif
+        unsigned long _unused[2];
+        unsigned int tramp[TRAMP_SIZE];
+        struct rt_siginfo *pinfo;
+        void *puc;
+        struct rt_siginfo info;
+        /* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */
+        char abigap[USER_REDZONE_SIZE];
+} __attribute__ ((aligned (16)));
+
+#define ARCH_RT_SIGRETURN(new_sp)				\
+        asm volatile(						\
+		"mr 1, %0 \n"					\
+		"li 0, "__stringify(__NR_rt_sigreturn)" \n"	\
+		"sc \n"						\
+		:						\
+		: "r"(new_sp)					\
+		: "1", "memory")
+
+/*
+ * Clone trampoline
+ *
+ * See glibc sysdeps/powerpc/powerpc64/sysdep.h for FRAME_MIN_SIZE defines
+ */
+#if _CALL_ELF != 2
+#error Only supporting ABIv2.
+#else
+#define FRAME_MIN_SIZE_PARM     96
+#endif
+#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, 	\
+			     thread_args, clone_restore_fn)		\
+	asm volatile( 							\
+		"clone_emul:					\n"	\
+		"/* Save fn, args, stack across syscall. */ 	\n"	\
+		"mr	14, %5	/* clone_restore_fn in r14 */ 	\n"	\
+		"mr	15, %6	/* &thread_args[i] in r15 */ 	\n"	\
+		"mr	3, %1	/* clone_flags */ 		\n"	\
+		"ld	4, %2	/* new_sp */ 			\n"	\
+		"mr	5, %3	/* &parent_tid */ 		\n"	\
+		"li	6, 0	/* tls = 0 ? */ 		\n"	\
+		"mr	7, %4	/* &thread_args[i].pid */ 	\n"	\
+		"li	0,"__stringify(__NR_clone)" 		\n"	\
+		"sc 						\n"	\
+		"/* Check for child process.  */		\n"	\
+		"cmpdi   cr1,3,0 				\n"	\
+		"crandc  cr1*4+eq,cr1*4+eq,cr0*4+so 		\n"	\
+		"bne-    cr1,clone_end 				\n"	\
+		"/* child */					\n"	\
+		"addi 14, 14, 8 /* jump over r2 fixup */	\n"	\
+		"mtctr	14					\n"	\
+		"mr	3,15 					\n"	\
+		"bctr 						\n"	\
+		"clone_end:					\n"	\
+		"mr	%0,3 \n"					\
+		: "=r"(ret)			/* %0 */		\
+		: "r"(clone_flags),		/* %1 */		\
+		  "m"(new_sp),			/* %2 */		\
+		  "r"(&parent_tid),		/* %3 */		\
+		  "r"(&thread_args[i].pid),	/* %4 */		\
+		  "r"(clone_restore_fn),	/* %5 */		\
+		  "r"(&thread_args[i])		/* %6 */		\
+		: "memory","0","3","4","5","6","7","14","15")
+
+#define RT_SIGFRAME_UC(rt_sigframe) rt_sigframe->uc
+#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.gp_regs[PT_NIP])
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
+#define RT_SIGFRAME_FPU(rt_sigframe) ((rt_sigframe)->uc.uc_mcontext)
+
+int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r);
+int restore_nonsigframe_gpregs(UserPpc64RegsEntry *r);
+
+/* Nothing to do, TLS is accessed through r13 */
+static inline void restore_tls(tls_t *ptls) { (void)ptls; }
+
+static inline int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+        return 0;
+}
+
+static inline int ptrace_flush_breakpoints(pid_t pid)
+{
+        return 0;
+}
+
+static inline int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe,
+					   mcontext_t *sigcontext)
+{
+	return 0;
+}
+
+/*
+ * Defined in arch/ppc64/syscall-common-ppc64.S
+ */
+int sys_shmat(int shmid, const void *shmaddr, int shmflg);
+
+#endif /*__CR_ASM_RESTORER_H__*/
diff --git a/arch/ppc64/include/asm/string.h b/arch/ppc64/include/asm/string.h
new file mode 100644
index 000000000000..034442781678
--- /dev/null
+++ b/arch/ppc64/include/asm/string.h
@@ -0,0 +1,11 @@
+#ifndef __CR_ASM_STRING_H__
+#define __CR_ASM_STRING_H__
+
+#include "compiler.h"
+
+/*
+ * TODO : We may optimized some code here instead of using the generic ones.
+ */
+#include "asm-generic/string.h"
+
+#endif /* __CR_ASM_STRING_H__ */
diff --git a/arch/ppc64/include/asm/types.h b/arch/ppc64/include/asm/types.h
new file mode 100644
index 000000000000..67b7fe2ec4c6
--- /dev/null
+++ b/arch/ppc64/include/asm/types.h
@@ -0,0 +1,111 @@
+#ifndef __CR_ASM_TYPES_H__
+#define __CR_ASM_TYPES_H__
+
+#include <stdbool.h>
+#include <signal.h>
+#include "protobuf/core.pb-c.h"
+
+#include "asm/page.h"
+#include "asm/bitops.h"
+#include "asm/int.h"
+
+/*
+ * Copied from kernel header include/uapi/asm-generic/signal-defs.h
+ */
+typedef void rt_signalfn_t(int, siginfo_t *, void *);
+typedef rt_signalfn_t *rt_sighandler_t;
+
+typedef void rt_restorefn_t(void);
+typedef rt_restorefn_t *rt_sigrestore_t;
+
+#define SIGMAX_OLD	31
+#define SIGMAX		64
+
+/*Copied from the Linux kernel arch/powerpc/include/uapi/asm/signal.h */
+#define _KNSIG		64
+#define _NSIG_BPW       64
+#define _KNSIG_WORDS     (_KNSIG / _NSIG_BPW)
+
+typedef struct {
+        uint64_t sig[_KNSIG_WORDS];
+} k_rtsigset_t;
+
+static inline void ksigfillset(k_rtsigset_t *set)
+{
+        int i;
+        for (i = 0; i < _KNSIG_WORDS; i++)
+                set->sig[i] = (unsigned long)-1;
+}
+
+/* Copied from the Linux kernel arch/powerpc/include/uapi/asm/signal.h */
+#define SA_RESTORER     0x04000000U
+
+typedef struct {
+        rt_sighandler_t rt_sa_handler;
+        unsigned long rt_sa_flags;
+        rt_sigrestore_t rt_sa_restorer;
+        k_rtsigset_t rt_sa_mask;               /* mask last for extensibility */
+} rt_sigaction_t;
+
+/*
+ * Copied from kernel header arch/powerpc/include/uapi/asm/ptrace.h
+ */
+typedef struct {
+        unsigned long gpr[32];
+        unsigned long nip;
+        unsigned long msr;
+        unsigned long orig_gpr3;        /* Used for restarting system calls */
+        unsigned long ctr;
+        unsigned long link;
+        unsigned long xer;
+        unsigned long ccr;
+        unsigned long softe;            /* Soft enabled/disabled */
+        unsigned long trap;             /* Reason for being here */
+        /* N.B. for critical exceptions on 4xx, the dar and dsisr
+           fields are overloaded to hold srr0 and srr1. */
+        unsigned long dar;              /* Fault registers */
+        unsigned long dsisr;            /* on 4xx/Book-E used for ESR */
+        unsigned long result;           /* Result of a system call */
+} user_regs_struct_t;
+
+typedef UserPpc64RegsEntry UserRegsEntry;
+
+#define CORE_ENTRY__MARCH	CORE_ENTRY__MARCH__PPC64
+
+#define ASSIGN_TYPED(a, b) do { a = (typeof(a))b; } while (0)
+#define ASSIGN_MEMBER(a,b,m) do { ASSIGN_TYPED((a)->m, (b)->m); } while (0)
+
+#define REG_RES(regs)           ((u64)(regs).gpr[3])
+#define REG_IP(regs)            ((u64)(regs).nip)
+#define REG_SYSCALL_NR(regs)    ((u64)(regs).gpr[0])
+
+
+#define CORE_THREAD_ARCH_INFO(core) core->ti_ppc64
+
+/*
+ * Copied from the following kernel header files :
+ * 	include/linux/auxvec.h
+ *	arch/powerpc/include/uapi/asm/auxvec.h
+ *	include/linux/mm_types.h
+ */
+#define AT_VECTOR_SIZE_BASE 20
+#define AT_VECTOR_SIZE_ARCH 6
+#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
+
+typedef uint64_t auxv_t;
+
+/* Not used but the structure parasite_dump_thread needs a tls_t field */
+typedef uint64_t tls_t;
+
+/*
+ * Copied for the Linux kernel arch/powerpc/include/asm/processor.h
+ *
+ * NOTE: 32bit task are supported.
+ */
+#define TASK_SIZE_USER64 (0x0000400000000000UL)
+#define TASK_SIZE TASK_SIZE_USER64
+
+static inline void *decode_pointer(uint64_t v) { return (void*)v; }
+static inline uint64_t encode_pointer(void *p) { return (uint64_t)p; }
+
+#endif /* __CR_ASM_TYPES_H__ */
diff --git a/arch/ppc64/include/asm/vdso.h b/arch/ppc64/include/asm/vdso.h
new file mode 100644
index 000000000000..8d089dde3a5d
--- /dev/null
+++ b/arch/ppc64/include/asm/vdso.h
@@ -0,0 +1,172 @@
+#ifndef __CR_ASM_VDSO_H__
+#define __CR_ASM_VDSO_H__
+
+#include <sys/types.h>
+
+#include "asm/int.h"
+#include "protobuf/vma.pb-c.h"
+
+struct parasite_ctl;
+struct vm_area_list;
+
+#define VDSO_PROT		(PROT_READ | PROT_EXEC)
+#define VVAR_PROT		(PROT_READ)
+
+#define VDSO_BAD_ADDR		(-1ul)
+#define VVAR_BAD_ADDR		VDSO_BAD_ADDR
+#define VDSO_BAD_PFN		(-1ull)
+#define VVAR_BAD_PFN		VDSO_BAD_PFN
+
+struct vdso_symbol {
+	char			name[32];
+	unsigned long		offset;
+};
+
+#define VDSO_SYMBOL_INIT	{ .offset = VDSO_BAD_ADDR, }
+
+/* Check if symbol present in symtable */
+static inline bool vdso_symbol_empty(struct vdso_symbol *s)
+{
+	return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
+}
+
+/*
+ * Pick from kernel file arch/powerpc/kernel/vdso64/vdso64.lds.S
+ *
+ * Note that '__kernel_datapage_offset' is not a service but mostly a data
+ * inside the text page which should not be used as is from user space.
+ */
+enum {
+	VDSO_SYMBOL_CLOCK_GETRES,
+	VDSO_SYMBOL_CLOCK_GETTIME,
+	VDSO_SYMBOL_GET_SYSCALL_MAP,
+	VDSO_SYMBOL_GET_TBFREQ,
+	VDSO_SYMBOL_GETCPU,
+	VDSO_SYMBOL_GETTIMEOFDAY,
+	VDSO_SYMBOL_SIGTRAMP_RT64,
+	VDSO_SYMBOL_SYNC_DICACHE,
+	VDSO_SYMBOL_SYNC_DICACHE_P5,
+	VDSO_SYMBOL_TIME,
+
+	VDSO_SYMBOL_MAX
+};
+
+#define VDSO_SYMBOL_CLOCK_GETRES_NAME		"__kernel_clock_getres"
+#define VDSO_SYMBOL_CLOCK_GETTIME_NAME		"__kernel_clock_gettime"
+#define VDSO_SYMBOL_GET_SYSCALL_MAP_NAME 	"__kernel_get_syscall_map"
+#define VDSO_SYMBOL_GET_TBFREQ_NAME		"__kernel_get_tbfreq"
+#define VDSO_SYMBOL_GETCPU_NAME			"__kernel_getcpu"
+#define VDSO_SYMBOL_GETTIMEOFDAY_NAME		"__kernel_gettimeofday"
+#define VDSO_SYMBOL_SIGTRAMP_RT64_NAME		"__kernel_sigtramp_rt64"
+#define VDSO_SYMBOL_SYNC_DICACHE_NAME		"__kernel_sync_dicache"
+#define VDSO_SYMBOL_SYNC_DICACHE_P5_NAME	"__kernel_sync_dicache_p5"
+#define VDSO_SYMBOL_TIME_NAME			"__kernel_time"
+
+struct vdso_symtable {
+	unsigned long		vma_start;
+	unsigned long		vma_end;
+	unsigned long		vvar_start;
+	unsigned long		vvar_end;
+	struct vdso_symbol	symbols[VDSO_SYMBOL_MAX];
+};
+
+#define VDSO_SYMTABLE_INIT						\
+	{								\
+		.vma_start	= VDSO_BAD_ADDR,			\
+		.vma_end	= VDSO_BAD_ADDR,			\
+		.vvar_start	= VVAR_BAD_ADDR,			\
+		.vvar_end	= VVAR_BAD_ADDR,			\
+		.symbols		= {				\
+			[0 ... VDSO_SYMBOL_MAX - 1] =			\
+				(struct vdso_symbol)VDSO_SYMBOL_INIT,	\
+			},						\
+	}
+
+/* Size of VMA associated with vdso */
+static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
+{
+	return t->vma_end - t->vma_start;
+}
+
+static inline unsigned long vvar_vma_size(struct vdso_symtable *t)
+{
+	return t->vvar_end - t->vvar_start;
+}
+/*
+ * Special mark which allows to identify runtime vdso where
+ * calls from proxy vdso are redirected. This mark usually
+ * placed at the start of vdso area where Elf header lives.
+ * Since such runtime vdso is solevey used by proxy and
+ * nobody else is supposed to access it, it's more-less
+ * safe to screw the Elf header with @signature and
+ * @proxy_addr.
+ *
+ * The @proxy_addr deserves a few comments. When we redirect
+ * the calls from proxy to runtime vdso, on next checkpoint
+ * it won't be possible to find which VMA is proxy, thus
+ * we save its address in the member.
+ */
+struct vdso_mark {
+	u64			signature;
+	unsigned long		proxy_vdso_addr;
+
+	unsigned long		version;
+
+	/*
+	 * In case of new vDSO format the VVAR area address
+	 * neeed for easier discovering where it lives without
+	 * relying on procfs output.
+	 */
+	unsigned long		proxy_vvar_addr;
+};
+
+#define VDSO_MARK_SIGNATURE	(0x6f73647675697263ULL)	/* Magic number (criuvdso) */
+#define VDSO_MARK_SIGNATURE_V2	(0x4f53447675697263ULL)	/* Magic number (criuvDSO) */
+#define VDSO_MARK_CUR_VERSION	(2)
+
+static inline void vdso_put_mark(void *where, unsigned long proxy_vdso_addr, unsigned long proxy_vvar_addr)
+{
+	struct vdso_mark *m = where;
+
+	m->signature		= VDSO_MARK_SIGNATURE_V2;
+	m->proxy_vdso_addr	= proxy_vdso_addr;
+	m->version		= VDSO_MARK_CUR_VERSION;
+	m->proxy_vvar_addr	= proxy_vvar_addr;
+}
+
+static inline bool is_vdso_mark(void *addr)
+{
+	struct vdso_mark *m = addr;
+
+	if (m->signature == VDSO_MARK_SIGNATURE_V2) {
+		/*
+		 * New format
+		 */
+		return true;
+	} else if (m->signature == VDSO_MARK_SIGNATURE) {
+		/*
+		 * Old format -- simply extend the mark up
+		 * to the version we support.
+		 */
+		vdso_put_mark(m, m->proxy_vdso_addr, VVAR_BAD_ADDR);
+		return true;
+	}
+	return false;
+}
+
+
+extern struct vdso_symtable vdso_sym_rt;
+extern u64 vdso_pfn;
+
+extern int vdso_init(void);
+extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
+extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
+extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
+			unsigned long vdso_rt_parked_at, size_t index,
+			VmaEntry *vmas, size_t nr_vmas);
+
+extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
+			       struct vm_area_list *vma_area_list);
+extern void write_intraprocedure_branch(void *to, void *from);
+
+#endif /* __CR_ASM_VDSO_H__ */
diff --git a/arch/ppc64/parasite-head.S b/arch/ppc64/parasite-head.S
new file mode 100644
index 000000000000..c7e5bdc66c52
--- /dev/null
+++ b/arch/ppc64/parasite-head.S
@@ -0,0 +1,44 @@
+#include "asm/linkage.h"
+#include "parasite.h"
+
+	.section .head.text
+	.align 8
+
+ENTRY(__export_parasite_head_start)
+
+	// int __used parasite_service(unsigned int cmd, void *args)
+	// cmd  = r3 = *__export_parasite_cmd (u32 ?)
+	// args = r4 = @parasite_args_ptr + @pc
+
+	bl	0f
+0:	mflr	2
+
+#define LOAD_REG_ADDR(reg, name)		\
+        addis   reg,2,(name - 0b)@ha;		\
+        addi    reg,2,(name - 0b)@l;
+
+	LOAD_REG_ADDR(3,__export_parasite_cmd)
+	lwz	3,0(3)
+
+	LOAD_REG_ADDR(4,parasite_args_ptr)
+	lwz	4,0(4)
+	add	4,4,2		// Fix up ptr
+
+	// Set the TOC pointer
+	LOAD_REG_ADDR(5,parasite_toc_ptr)
+	ld	5,0(5)
+	add	2,2,5		// Fix up ptr
+
+	bl      parasite_service
+	twi 	31,0,0		// Should generate SIGTRAP
+
+parasite_args_ptr:
+        .long __export_parasite_args - (0b - __export_parasite_head_start)
+
+__export_parasite_cmd:
+	.long 0
+
+parasite_toc_ptr:
+	.long .TOC. - (0b - __export_parasite_head_start)
+
+END(__export_parasite_head_start)
diff --git a/arch/ppc64/restorer-trampoline.S b/arch/ppc64/restorer-trampoline.S
new file mode 100644
index 000000000000..5e15615ae1aa
--- /dev/null
+++ b/arch/ppc64/restorer-trampoline.S
@@ -0,0 +1,33 @@
+#include "asm/linkage.h"
+#include "parasite.h"
+
+	.section	.head.text
+	.align		8
+
+	// Called through parasite_unmap
+	// This trampoline is there to restore r2 before jumping back to the
+	// C code.
+#define LOAD_REG_ADDR(reg, name)                \
+        addis   reg,7,(name - 0b)@ha;           \
+        addi    reg,7,(name - 0b)@l;
+
+ENTRY(__export_unmap_trampoline)
+        bl      0f
+0:      mflr    7
+	LOAD_REG_ADDR(8,restorer_r2)
+	ld	2,0(8)
+	b	__export_unmap
+	//END(__export_restore_unmap_trampoline)
+
+	// Called from JUMP_TO_RESTORER_BLOB, ctr contains the address where
+	// to jump to, and r3 etc contains the parameter.
+	// Assuming up to 4 parameters here since we are using r7 and r8.
+ENTRY(__export_restore_task_trampoline)
+        bl      0f
+0:      mflr    7
+	LOAD_REG_ADDR(8,restorer_r2)
+	std	2,0(8)
+	b	__export_restore_task
+
+restorer_r2:
+	.long	0
diff --git a/arch/ppc64/restorer.c b/arch/ppc64/restorer.c
new file mode 100644
index 000000000000..c5e19d9fb977
--- /dev/null
+++ b/arch/ppc64/restorer.c
@@ -0,0 +1,14 @@
+#include <unistd.h>
+
+#include "restorer.h"
+#include "asm/restorer.h"
+#include "asm/fpu.h"
+
+#include "syscall.h"
+#include "log.h"
+//#include "cpu.h"
+
+int restore_nonsigframe_gpregs(UserPpc64RegsEntry *r)
+{
+	return 0;
+}
diff --git a/arch/ppc64/syscall-common-ppc64.S b/arch/ppc64/syscall-common-ppc64.S
new file mode 100644
index 000000000000..78bc1b7e6e85
--- /dev/null
+++ b/arch/ppc64/syscall-common-ppc64.S
@@ -0,0 +1,32 @@
+#include "asm/linkage.h"
+#include <asm/unistd.h>		/* for __NR_ipc */
+
+#define SYSCALL(name, opcode)		\
+	ENTRY(name);			\
+	li	0, opcode;		\
+	b	__syscall_common;	\
+	END(name)
+
+	.text
+	.align	4
+
+ENTRY(__syscall_common)
+	sc
+	bnslr+		/* if no error return to LR */
+	neg	3,3	/* r3 = -r3 to return -errno value */
+	blr
+END(__syscall_common)
+
+ENTRY(__cr_restore_rt)
+	li	0, __NR_rt_sigreturn
+	b	__syscall_common
+END(__cr_restore_rt)
+
+	# On Power, shmat is done through the ipc system call.
+ENTRY(sys_shmat)
+	mr	7, 4	# shmaddr -> ptr
+	mr	4, 3	# shmid -> first
+	li	3, 21	# call = SHMAT
+	li	0, __NR_ipc
+	b	__syscall_common
+END(sys_shmat)
diff --git a/arch/ppc64/syscall-ppc64.def b/arch/ppc64/syscall-ppc64.def
new file mode 100644
index 000000000000..d8ae4491c679
--- /dev/null
+++ b/arch/ppc64/syscall-ppc64.def
@@ -0,0 +1,99 @@
+#
+# System calls table, please make sure the table consist only the syscalls
+# really used somewhere in project.
+#
+# The template is (name and srguments are optinal if you need only __NR_x
+# defined, but no realy entry point in syscalls lib).
+#
+# name			code		name			arguments
+# -----------------------------------------------------------------------
+#
+__NR_read		3		sys_read		(int fd, void *buf, unsigned long count)
+__NR_write		4		sys_write		(int fd, const void *buf, unsigned long count)
+__NR_open		5		sys_open		(const char *filename, unsigned long flags, unsigned long mode)
+__NR_close		6		sys_close		(int fd)
+__NR_lseek		19		sys_lseek		(int fd, unsigned long offset, unsigned long origin)
+__NR_mmap		90		sys_mmap		(void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+__NR_mprotect		125		sys_mprotect		(const void *addr, unsigned long len, unsigned long prot)
+__NR_munmap		91		sys_munmap		(void *addr, unsigned long len)
+__NR_brk		45		sys_brk			(void *addr)
+__NR_rt_sigaction	173		sys_sigaction		(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+__NR_rt_sigprocmask	174		sys_sigprocmask		(int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+__NR_rt_sigreturn	172		sys_rt_sigreturn	(void)
+__NR_ioctl		54		sys_ioctl		(unsigned int fd, unsigned int cmd, unsigned long arg)
+__NR_pread64		179		sys_pread		(unsigned int fd, char *buf, size_t count, loff_t pos)
+__NR_mremap		163		sys_mremap		(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr)
+__NR_mincore		206		sys_mincore		(void *addr, unsigned long size, unsigned char *vec)
+__NR_madvise		205		sys_madvise		(unsigned long start, size_t len, int behavior)
+__NR_pause		29		sys_pause		(void)
+__NR_nanosleep		162		sys_nanosleep		(struct timespec *req, struct timespec *rem)
+__NR_getitimer		105		sys_getitimer		(int which, const struct itimerval *val)
+__NR_setitimer		104		sys_setitimer		(int which, const struct itimerval *val, struct itimerval *old)
+__NR_getpid		20		sys_getpid		(void)
+__NR_socket		326		sys_socket		(int domain, int type, int protocol)
+__NR_connect		328		sys_connect		(int sockfd, struct sockaddr *addr, int addrlen)
+__NR_sendto		335		sys_sendto		(int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
+__NR_recvfrom		337		sys_recvfrom		(int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
+__NR_sendmsg		341		sys_sendmsg		(int sockfd, const struct msghdr *msg, int flags)
+__NR_recvmsg		342		sys_recvmsg		(int sockfd, struct msghdr *msg, int flags)
+__NR_shutdown		338		sys_shutdown		(int sockfd, int how)
+__NR_bind		327		sys_bind		(int sockfd, const struct sockaddr *addr, int addrlen)
+__NR_setsockopt		339		sys_setsockopt		(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+__NR_getsockopt		340		sys_getsockopt		(int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+__NR_clone		120		sys_clone		(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid)
+__NR_exit		1		sys_exit		(unsigned long error_code)
+__NR_wait4		114		sys_wait4		(int pid, int *status, int options, struct rusage *ru)
+__NR_kill		37		sys_kill		(long pid, int sig)
+__NR_fcntl		55		sys_fcntl		(int fd, int type, long arg)
+__NR_flock		143		sys_flock		(int fd, unsigned long cmd)
+__NR_mkdir		39		sys_mkdir		(const char *name, int mode)
+__NR_rmdir		40		sys_rmdir		(const char *name)
+__NR_unlink		10		sys_unlink		(char *pathname)
+__NR_readlink		85		sys_readlink		(const char *path, char *buf, int bufsize)
+__NR_umask		60		sys_umask		(int mask)
+__NR_getgroups		80		sys_getgroups		(int gsize, unsigned int *groups)
+__NR_setresuid		164		sys_setresuid		(int uid, int euid, int suid)
+__NR_getresuid		165		sys_getresuid		(int *uid, int *euid, int *suid)
+__NR_setresgid		169		sys_setresgid		(int gid, int egid, int sgid)
+__NR_getresgid		170		sys_getresgid		(int *gid, int *egid, int *sgid)
+__NR_getpgid		132		sys_getpgid		(pid_t pid)
+__NR_setfsuid		138		sys_setfsuid		(int fsuid)
+__NR_setfsgid		139		sys_setfsgid		(int fsgid)
+__NR_getsid		147		sys_getsid		(void)
+__NR_capget		183		sys_capget		(struct cap_header *h, struct cap_data *d)
+__NR_capset		184		sys_capset		(struct cap_header *h, struct cap_data *d)
+__NR_rt_sigqueueinfo	177		sys_rt_sigqueueinfo	(pid_t pid, int sig, siginfo_t *info)
+__NR_sigaltstack	185		sys_sigaltstack		(const void *uss, void *uoss)
+__NR_personality	136		sys_personality		(unsigned int personality)
+__NR_setpriority	97		sys_setpriority		(int which, int who, int nice)
+__NR_sched_setscheduler	156		sys_sched_setscheduler	(int pid, int policy, struct sched_param *p)
+__NR_prctl		171		sys_prctl		(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+__NR_setrlimit		75		sys_setrlimit		(int resource, struct krlimit *rlim)
+__NR_mount		21		sys_mount		(char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
+__NR_umount2		52		sys_umount2		(char *name, int flags)
+__NR_gettid		207		sys_gettid		(void)
+__NR_futex		221		sys_futex		(u32 *uaddr, int op, u32 val, struct timespec *utime, u32 *uaddr2, u32 val3)
+__NR_set_tid_address	232		sys_set_tid_address	(int *tid_addr)
+__NR_restart_syscall	0		sys_restart_syscall	(void)
+__NR_sys_timer_create	240		sys_timer_create	(clockid_t which_clock, struct sigevent *timer_event_spec, timer_t *created_timer_id)
+__NR_sys_timer_settime	241		sys_timer_settime	(timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
+__NR_sys_timer_gettime	242		sys_timer_gettime	(int timer_id, const struct itimerspec *setting)
+__NR_sys_timer_getoverrun	243		sys_timer_getoverrun	(int timer_id)
+__NR_sys_timer_delete	244		sys_timer_delete	(timer_t timer_id)
+__NR_clock_gettime	246		sys_clock_gettime	(const clockid_t which_clock, const struct timespec *tp)
+__NR_exit_group		234		sys_exit_group		(int error_code)
+__NR_set_robust_list	300		sys_set_robust_list	(struct robust_list_head *head, size_t len)
+__NR_get_robust_list	299		sys_get_robust_list	(int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
+__NR_vmsplice		285		sys_vmsplice		(int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
+__NR_timerfd_settime	311		sys_timerfd_settime	(int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
+__NR_signalfd4		313		sys_signalfd		(int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
+__NR_rt_tgsigqueueinfo	322		sys_rt_tgsigqueueinfo	(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+__NR_fanotify_init	323		sys_fanotify_init	(unsigned int flags, unsigned int event_f_flags)
+__NR_fanotify_mark	324		sys_fanotify_mark	(int fanotify_fd, unsigned int flags, u64 mask, int dfd, const char *pathname)
+__NR_prlimit64		325		sys_prlimit64		(pid_t pid, unsigned int resource, const struct rlimit64 *new_rlim, struct rlimit64 *old_rlim)
+__NR_open_by_handle_at	346		sys_open_by_handle_at	(int mountdirfd, struct file_handle *handle, int flags)
+__NR_setns		350		sys_setns		(int fd, int nstype)
+__NR_kcmp		354		sys_kcmp		(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
+__NR_memfd_create	360		sys_memfd_create	(const char *name, unsigned int flags)
+__NR_io_setup		227		sys_io_setup		(unsigned nr_events, aio_context_t *ctx_idp)
+__NR_io_getevents	229		sys_io_getevents	(aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
\ No newline at end of file
diff --git a/arch/ppc64/syscalls-ppc64.sh b/arch/ppc64/syscalls-ppc64.sh
new file mode 100644
index 000000000000..22c81293dfff
--- /dev/null
+++ b/arch/ppc64/syscalls-ppc64.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+
+gen_asm() {
+	in=$1
+	codesout=$2
+	codesinc=`echo $2 | sed -e 's/.*include\///g'`
+	protosout=$3
+	asmout=$4
+	asmcommon=`echo $5 | sed -e 's/.*include\///g'`
+	prototypes=`echo $6 | sed -e 's/.*include\///g'`
+
+	codesdef=`echo $codesout | sed -e 's/.*include\///g' | tr "[[:space:]].-" _`
+	protosdef=`echo $protosout | sed -e 's/.*include\///g' | tr "[[:space:]].-" _`
+
+	echo "/* Autogenerated, don't edit */"	>  $codesout
+	echo "#ifndef $codesdef"		>> $codesout
+	echo "#define $codesdef"		>> $codesout
+
+	echo "/* Autogenerated, don't edit */"	>  $protosout
+	echo "#ifndef $protosdef"		>> $protosout
+	echo "#define $protosdef"		>> $protosout
+	echo "#include \"$prototypes\""		>> $protosout
+	echo "#include \"$codesinc\""		>> $protosout
+
+	echo "/* Autogenerated, don't edit */"	>  $asmout
+	echo "#include \"$codesinc\""		>> $asmout
+	echo "#include \"$asmcommon\""		>> $asmout
+
+	cat $in | egrep -v '^#' | sed -e 's/\t\{1,\}/|/g' | awk -F '|' '{print "#define", $1, $2}'		>> $codesout
+	cat $in | egrep -v '^#' | sed -e 's/\t\{1,\}/|/g' | awk -F '|' '{print "extern long ", $3, $4, ";"}'	>> $protosout
+	cat $in | egrep -v '^#' | sed -e 's/\t\{1,\}/|/g' | awk -F '|' '{print "SYSCALL(", $3, ",", $2, ")"}'	>> $asmout
+
+	echo "#endif /* $codesdef */"		>> $codesout
+	echo "#endif /* $protosdef */"		>> $protosout
+}
+
+gen_exec() {
+	in=$1
+	codecout=$2
+
+	echo "/* Autogenerated, don't edit */"	>  $codecout
+
+	cat $in | egrep -v '^#' | sed -e 's/\t\{1,\}/|/g' | awk -F '|' '{print "SYSCALL(", substr($3, 5), ",", $2, ")"}' >> $codecout
+}
+
+if [ "$1" = "--asm" ]; then
+	shift
+	gen_asm $@
+fi
+
+if [ "$1" = "--exec" ]; then
+	shift
+	gen_exec $@
+fi
diff --git a/arch/ppc64/vdso-pie.c b/arch/ppc64/vdso-pie.c
new file mode 100644
index 000000000000..8219e4af1be3
--- /dev/null
+++ b/arch/ppc64/vdso-pie.c
@@ -0,0 +1,594 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "asm/string.h"
+#include "asm/types.h"
+
+#include "syscall.h"
+#include "image.h"
+#include "vdso.h"
+#include "vma.h"
+#include "log.h"
+#include "bug.h"
+
+#ifdef LOG_PREFIX
+# undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "vdso: "
+
+/* This symbols are defined in vdso-trampoline.S */
+extern char *vdso_trampoline, *vdso_trampoline_end;
+
+static inline void invalidate_caches(unsigned long at)
+{
+    asm volatile("isync		\n"	\
+		 "li 	3,0	\n" 	\
+		 "dcbf	3,%0	\n"	\
+		 "sync		\n"	\
+		 "icbi 	3,%0	\n" 	\
+		 "isync		\n" 	\
+		 : /* no output */	\
+		 : "r"(at)		\
+		 :"memory", "r3");
+}
+
+/* This is the size of the trampoline call :
+ * 	mlfr	r0
+ *	bl	trampoline
+ *	<64 bit address>
+ */
+#define TRAMP_CALL_SIZE	(2*sizeof(uint32_t) + sizeof(uint64_t))
+
+/*
+ * put_trampoline does 2 things :
+ *
+ *   1. it looks for a place in the checkpointed vDSO where to put the
+ *	trampoline code (see vdso-trampoline.S).
+ *
+ *   2. for each symbol from the checkpointed vDSO, it checks that there are
+ *	enough place to put the call to the vDSO trampoline (see
+ *	TRAMP_CALL_SIZE's comment above).
+ *	This done by checking that there is no interesting symbols in the range
+ *	of current one's offset -> (current one's offset + TRAMP_CALL_SIZE).
+ *	Unfortunately the symbols are not sorted by address so we have to look
+ *	for the complete table all the time. Since the vDSO is small, this is
+ *	not a big issue.
+ */
+static unsigned long put_trampoline(unsigned long at, struct vdso_symtable *sym)
+{
+	int i,j;
+	unsigned long size;
+	unsigned long trampoline = 0;
+
+	/* First of all we have to find a place where to put the trampoline
+	 * code.
+	 */
+	size = (unsigned long)&vdso_trampoline_end
+		- (unsigned long)&vdso_trampoline;
+
+	for (i = 0; i < ARRAY_SIZE(sym->symbols); i++) {
+		if (vdso_symbol_empty(&sym->symbols[i]))
+			continue;
+
+		pr_debug("Checking '%s' at %lx\n", sym->symbols[i].name,
+			 sym->symbols[i].offset);
+
+		/* find the nearest followin symbol we are interested in */
+		for (j=0; j < ARRAY_SIZE(sym->symbols); j++) {
+			if (i==j || vdso_symbol_empty(&sym->symbols[j]))
+				continue;
+
+			/* pr_debug("next:%s(%lx)\n", sym->symbols[j].name, */
+			/* 	 sym->symbols[j].offset); */
+
+			if (sym->symbols[j].offset <= sym->symbols[i].offset)
+				/* this symbol is above the current one */
+				continue;
+
+			if ((sym->symbols[i].offset+TRAMP_CALL_SIZE) >
+			    sym->symbols[j].offset) {
+				/* we have a major issue here since we cannot
+				 * even put the trampoline call for this symbol
+				 */
+				pr_err("Can't handle small vDSO symbol %s\n",
+				       sym->symbols[i].name);
+				return 0;
+			}
+
+			if (trampoline)
+				/* no need to put it twice */
+				continue;
+
+			if ((sym->symbols[j].offset -
+			     (sym->symbols[i].offset+TRAMP_CALL_SIZE)) <= size)
+				/* not enough place */
+				continue;
+
+			/* We can put the trampoline there */
+			trampoline = at + sym->symbols[i].offset;
+			trampoline += TRAMP_CALL_SIZE;
+
+			pr_debug("Puting vDSO trampoline in %s at %lx",
+				 sym->symbols[i].name, trampoline);
+			builtin_memcpy((void *)trampoline, &vdso_trampoline,
+				       size);
+			invalidate_caches(trampoline);
+		}
+	}
+
+	return trampoline;
+}
+
+static inline void put_trampoline_call(unsigned long at, unsigned long to,
+				       unsigned long tr)
+{
+    uint32_t *addr = (uint32_t *)at;;
+
+    *addr++ = 0x7C0802a6;					/* mflr	r0 */
+    *addr++ = 0x48000001 | ((long)(tr-at-4) & 0x3fffffc);	/* bl tr */
+    *(uint64_t *)addr = to;	/* the address to read by the trampoline */
+
+    invalidate_caches(at);
+}
+
+static int vdso_redirect_calls(unsigned long base_to,
+			       unsigned long base_from,
+			       struct vdso_symtable *to,
+			       struct vdso_symtable *from)
+{
+	unsigned int i;
+	unsigned long trampoline;
+
+	trampoline = (unsigned long)put_trampoline(base_from, from);
+	if (!trampoline)
+		return 1;
+
+	for (i = 0; i < ARRAY_SIZE(to->symbols); i++) {
+		if (vdso_symbol_empty(&from->symbols[i]))
+			continue;
+
+		pr_debug("br: %lx/%lx -> %lx/%lx (index %d) '%s'\n",
+			 base_from, from->symbols[i].offset,
+			 base_to, to->symbols[i].offset, i,
+			 from->symbols[i].name);
+
+		put_trampoline_call(base_from + from->symbols[i].offset,
+				    base_to + to->symbols[i].offset,
+				    trampoline);
+	}
+
+	return 0;
+}
+
+/* Check if pointer is out-of-bound */
+static bool __ptr_oob(void *ptr, void *start, size_t size)
+{
+	void *end = (void *)((unsigned long)start + size);
+	return ptr > end || ptr < start;
+}
+
+/*
+ * Elf hash, see format specification.
+ */
+static unsigned long elf_hash(const unsigned char *name)
+{
+	unsigned long h = 0, g;
+
+	while (*name) {
+		h = (h << 4) + *name++;
+		g = h & 0xf0000000ul;
+		if (g)
+			h ^= g >> 24;
+		h &= ~g;
+	}
+	return h;
+}
+
+/*
+ * TODO :
+ * 	PIE linking doesn't work for this kind of definition.
+ *	When build for the parasite code, the pointers to the string are
+ *	computed from the start of the object but the generated code is
+ *	assuming that the pointers are fixed by the loader.
+ *
+ *	In addition, GCC create a call to C library memcpy when the table is
+ *	containing more than 9 items. Since the parasite code is not linked
+ *	with the C library an undefined symbol error is raised at build time.
+ *	By initialising the table at run time, we are working around this
+ *	issue.
+ */
+#ifdef __pie__
+static const char *VDSO_SYMBOL(int i)
+{
+	static char *vdso_symbols[VDSO_SYMBOL_MAX];
+	static int init_done = 0;
+
+#define SET_VDSO_SYM(s) vdso_symbols[VDSO_SYMBOL_##s] = VDSO_SYMBOL_##s##_NAME
+	if (!init_done) {
+		SET_VDSO_SYM(CLOCK_GETRES);
+		SET_VDSO_SYM(CLOCK_GETTIME);
+		SET_VDSO_SYM(GET_SYSCALL_MAP);
+		SET_VDSO_SYM(GET_TBFREQ);
+		SET_VDSO_SYM(GETCPU);
+		SET_VDSO_SYM(GETTIMEOFDAY);
+		SET_VDSO_SYM(SIGTRAMP_RT64);
+		SET_VDSO_SYM(SYNC_DICACHE);
+		SET_VDSO_SYM(SYNC_DICACHE_P5);
+		SET_VDSO_SYM(TIME);
+		init_done = 1;
+	}
+	return vdso_symbols[i];
+}
+#else
+#define SET_VDSO_SYM(s) [VDSO_SYMBOL_##s] = VDSO_SYMBOL_##s##_NAME
+const char *vdso_symbols[VDSO_SYMBOL_MAX] = {
+	SET_VDSO_SYM(CLOCK_GETRES),
+	SET_VDSO_SYM(CLOCK_GETTIME),
+	SET_VDSO_SYM(GET_SYSCALL_MAP),
+	SET_VDSO_SYM(GET_TBFREQ),
+	SET_VDSO_SYM(GETCPU),
+	SET_VDSO_SYM(GETTIMEOFDAY),
+	SET_VDSO_SYM(SIGTRAMP_RT64),
+	SET_VDSO_SYM(SYNC_DICACHE),
+	SET_VDSO_SYM(SYNC_DICACHE_P5),
+	SET_VDSO_SYM(TIME)
+};
+#define VDSO_SYMBOL(i)	vdso_symbols[i]
+#endif
+
+int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
+{
+	Elf64_Phdr *dynamic = NULL, *load = NULL;
+	Elf64_Ehdr *ehdr = (void *)mem;
+	Elf64_Dyn *dyn_strtab = NULL;
+	Elf64_Dyn *dyn_symtab = NULL;
+	Elf64_Dyn *dyn_strsz = NULL;
+	Elf64_Dyn *dyn_syment = NULL;
+	Elf64_Dyn *dyn_hash = NULL;
+	Elf64_Word *hash = NULL;
+	Elf64_Phdr *phdr;
+	Elf64_Dyn *d;
+
+	Elf64_Word *bucket, *chain;
+	Elf64_Word nbucket, nchain;
+
+	/*
+	 * See Elf specification for this magic values.
+	 */
+	static const char elf_ident[] = {
+		0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	};
+
+	char *dynsymbol_names;
+	unsigned int i, j, k;
+
+	BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident));
+
+	pr_debug("Parsing at %lx %lx\n", (long)mem, (long)mem + (long)size);
+
+	/*
+	 * Make sure it's a file we support.
+	 */
+	if (builtin_memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) {
+		pr_err("Elf header magic mismatch\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * We need PT_LOAD and PT_DYNAMIC here. Each once.
+	 */
+	phdr = (void *)&mem[ehdr->e_phoff];
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		if (__ptr_oob(phdr, mem, size))
+			goto err_oob;
+		switch (phdr->p_type) {
+		case PT_DYNAMIC:
+			if (dynamic) {
+				pr_err("Second PT_DYNAMIC header\n");
+				return -EINVAL;
+			}
+			dynamic = phdr;
+			break;
+		case PT_LOAD:
+			if (load) {
+				pr_err("Second PT_LOAD header\n");
+				return -EINVAL;
+			}
+			load = phdr;
+			break;
+		}
+	}
+
+	if (!load || !dynamic) {
+		pr_err("One of obligated program headers is missed\n");
+		return -EINVAL;
+	}
+
+	pr_debug("PT_LOAD p_vaddr: %lx\n", (unsigned long)load->p_vaddr);
+
+	/*
+	 * Dynamic section tags should provide us the rest of information
+	 * needed. Note that we're interested in a small set of tags.
+	 */
+	d = (void *)&mem[dynamic->p_offset];
+	for (i = 0; i < dynamic->p_filesz / sizeof(*d); i++, d++) {
+		if (__ptr_oob(d, mem, size))
+			goto err_oob;
+
+		if (d->d_tag == DT_NULL) {
+			break;
+		} else if (d->d_tag == DT_STRTAB) {
+			dyn_strtab = d;
+			pr_debug("DT_STRTAB: %lx\n", (unsigned long)d->d_un.d_ptr);
+		} else if (d->d_tag == DT_SYMTAB) {
+			dyn_symtab = d;
+			pr_debug("DT_SYMTAB: %lx\n", (unsigned long)d->d_un.d_ptr);
+		} else if (d->d_tag == DT_STRSZ) {
+			dyn_strsz = d;
+			pr_debug("DT_STRSZ: %lx\n", (unsigned long)d->d_un.d_val);
+		} else if (d->d_tag == DT_SYMENT) {
+			dyn_syment = d;
+			pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val);
+		} else if (d->d_tag == DT_HASH) {
+			dyn_hash = d;
+			pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
+		}
+	}
+
+	if (!dyn_strtab || !dyn_symtab || !dyn_strsz || !dyn_syment || !dyn_hash) {
+		pr_err("Not all dynamic entries are present\n");
+		return -EINVAL;
+	}
+
+	dynsymbol_names = &mem[dyn_strtab->d_un.d_val - load->p_vaddr];
+	if (__ptr_oob(dynsymbol_names, mem, size))
+		goto err_oob;
+
+	hash = (void *)&mem[(unsigned long)dyn_hash->d_un.d_ptr - (unsigned long)load->p_vaddr];
+	if (__ptr_oob(hash, mem, size))
+		goto err_oob;
+
+	nbucket = hash[0];
+	nchain = hash[1];
+	bucket = &hash[2];
+	chain = &hash[nbucket + 2];
+
+	pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n",
+		 (long)nbucket, (long)nchain, (unsigned long)bucket, (unsigned long)chain);
+
+	for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
+		const char * symbol = VDSO_SYMBOL(i);
+		k = elf_hash((const unsigned char *)symbol);
+
+		for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) {
+			Elf64_Sym *sym = (void *)&mem[dyn_symtab->d_un.d_ptr - load->p_vaddr];
+			char *name;
+
+			sym = &sym[j];
+			if (__ptr_oob(sym, mem, size))
+				continue;
+
+			if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC &&
+			    ELF64_ST_BIND(sym->st_info) != STB_GLOBAL)
+				continue;
+
+			name = &dynsymbol_names[sym->st_name];
+			if (__ptr_oob(name, mem, size))
+				continue;
+
+			if (builtin_strcmp(name, symbol))
+				continue;
+
+			builtin_memcpy(t->symbols[i].name, name, sizeof(t->symbols[i].name));
+			t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
+			break;
+		}
+	}
+
+	return 0;
+
+err_oob:
+	pr_err("Corrupted Elf data\n");
+	return -EFAULT;
+}
+
+static int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
+{
+	unsigned long addr;
+
+	pr_debug("Remap %s %lx -> %lx\n", who, from, to);
+
+	addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
+	if (addr != to) {
+		pr_err("Unable to remap %lx -> %lx %lx\n",
+		       from, to, addr);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* Park runtime vDSO in some safe place where it can be accessible from restorer */
+int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size)
+{
+	int ret;
+
+	BUG_ON((vdso_vma_size(sym_rt) + vvar_vma_size(sym_rt)) < park_size);
+
+	if (sym_rt->vvar_start != VDSO_BAD_ADDR) {
+		if (sym_rt->vma_start < sym_rt->vvar_start) {
+			ret  = vdso_remap("rt-vdso", sym_rt->vma_start,
+					  park_at, vdso_vma_size(sym_rt));
+			park_at += vdso_vma_size(sym_rt);
+			ret |= vdso_remap("rt-vvar", sym_rt->vvar_start,
+					  park_at, vvar_vma_size(sym_rt));
+		} else {
+			ret  = vdso_remap("rt-vvar", sym_rt->vvar_start,
+					  park_at, vvar_vma_size(sym_rt));
+			park_at += vvar_vma_size(sym_rt);
+			ret |= vdso_remap("rt-vdso", sym_rt->vma_start,
+					  park_at, vdso_vma_size(sym_rt));
+		}
+	} else
+		ret = vdso_remap("rt-vdso", sym_rt->vma_start,
+				 park_at, vdso_vma_size(sym_rt));
+	return ret;
+}
+
+int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
+		 unsigned long vdso_rt_parked_at, size_t index,
+		 VmaEntry *vmas, size_t nr_vmas)
+{
+	VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
+	struct vdso_symtable s = VDSO_SYMTABLE_INIT;
+	bool remap_rt = false;
+
+	/*
+	 * Figure out which kind of vdso tuple we get.
+	 */
+	if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
+		vma_vdso = &vmas[index];
+	else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
+		vma_vvar = &vmas[index];
+
+	if (index < (nr_vmas - 1)) {
+		if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
+			vma_vdso = &vmas[index + 1];
+		else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
+			vma_vvar = &vmas[index + 1];
+	}
+
+	if (!vma_vdso) {
+		pr_err("Can't find vDSO area in image\n");
+		return -1;
+	}
+
+	/*
+	 * vDSO mark overwrites Elf program header of proxy vDSO thus
+	 * it must never ever be greater in size.
+	 */
+	BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));
+
+	/*
+	 * Find symbols in vDSO zone read from image.
+	 */
+	if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
+		return -1;
+
+	/*
+	 * Proxification strategy
+	 *
+	 *  - There might be two vDSO zones: vdso code and optionally vvar data
+	 *  - To be able to use in-place remapping we need
+	 *
+	 *    a) Size and order of vDSO zones are to match
+	 *    b) Symbols offsets must match
+	 *    c) Have same number of vDSO zones
+	 */
+	if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
+		size_t i;
+
+		for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
+			if (s.symbols[i].offset != sym_rt->symbols[i].offset)
+				break;
+		}
+
+		if (i == ARRAY_SIZE(s.symbols)) {
+			if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
+				remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
+				if (remap_rt) {
+					long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
+					long delta_this = vma_vvar->start - vma_vdso->start;
+
+					remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
+				}
+			} else
+				remap_rt = true;
+		}
+	}
+
+	pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
+		 vma_vdso->start, vma_vdso->end,
+		 vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
+		 vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);
+
+	/*
+	 * Easy case -- the vdso from image has same offsets, order and size
+	 * as runtime, so we simply remap runtime vdso to dumpee position
+	 * without generating any proxy.
+	 *
+	 * Note we may remap VVAR vdso as well which might not yet been mapped
+	 * by a caller code. So drop VMA_AREA_REGULAR from it and caller would
+	 * not touch it anymore.
+	 */
+	if (remap_rt) {
+		int ret = 0;
+
+		pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");
+
+		if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
+			pr_err("Failed to unmap %s\n", who);
+			return -1;
+		}
+
+		if (vma_vvar) {
+			if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
+				pr_err("Failed to unmap %s\n", who);
+				return -1;
+			}
+
+			if (vma_vdso->start < vma_vvar->start) {
+				ret  = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
+				vdso_rt_parked_at += vdso_vma_size(sym_rt);
+				ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
+			} else {
+				ret  = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
+				vdso_rt_parked_at += vvar_vma_size(sym_rt);
+				ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
+			}
+		} else
+			ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
+
+		return ret;
+	}
+
+	/*
+	 * Now complex case -- we need to proxify calls. We redirect
+	 * calls from dumpee vdso to runtime vdso, making dumpee
+	 * to operate as proxy vdso.
+	 */
+	pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
+
+	/*
+	 * Don't forget to shift if vvar is before vdso.
+	 */
+	if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
+	    sym_rt->vvar_start < sym_rt->vma_start)
+		vdso_rt_parked_at += vvar_vma_size(sym_rt);
+
+	if (vdso_redirect_calls(vdso_rt_parked_at,
+				vma_vdso->start,
+				sym_rt, &s)) {
+		pr_err("Failed to proxify dumpee contents\n");
+		return -1;
+	}
+
+	/*
+	 * Put a special mark into runtime vdso, thus at next checkpoint
+	 * routine we could detect this vdso and do not dump it, since
+	 * it's auto-generated every new session if proxy required.
+	 */
+	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), PROT_WRITE);
+	vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
+	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), VDSO_PROT);
+	return 0;
+}
diff --git a/arch/ppc64/vdso-trampoline.S b/arch/ppc64/vdso-trampoline.S
new file mode 100644
index 000000000000..54a22453701a
--- /dev/null
+++ b/arch/ppc64/vdso-trampoline.S
@@ -0,0 +1,11 @@
+#include "asm/linkage.h"
+
+	.section	.text
+
+GLOBAL(vdso_trampoline)
+	mflr	12			/* r12 vdso_ptr's address */
+	mtlr	0			/* restore lr */
+	ld	12,0(12)		/* read value store in vdso_ptr */
+	mtctr	12			/* branch to it */
+	bctr
+GLOBAL(vdso_trampoline_end)
diff --git a/arch/ppc64/vdso.c b/arch/ppc64/vdso.c
new file mode 100644
index 000000000000..43d9637f00af
--- /dev/null
+++ b/arch/ppc64/vdso.c
@@ -0,0 +1,309 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <elf.h>
+#include <fcntl.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "asm/types.h"
+#include "asm/parasite-syscall.h"
+
+#include "parasite-syscall.h"
+#include "parasite.h"
+#include "compiler.h"
+#include "kerndat.h"
+#include "vdso.h"
+#include "util.h"
+#include "log.h"
+#include "mem.h"
+#include "vma.h"
+
+#ifdef LOG_PREFIX
+# undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "vdso: "
+
+struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT;
+u64 vdso_pfn = VDSO_BAD_PFN;
+/*
+ * The VMAs list might have proxy vdso/vvar areas left
+ * from previous dump/restore cycle so we need to detect
+ * them and eliminated from the VMAs list, they will be
+ * generated again on restore if needed.
+ */
+int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
+			struct vm_area_list *vma_area_list)
+{
+	unsigned long proxy_vdso_addr = VDSO_BAD_ADDR;
+	unsigned long proxy_vvar_addr = VVAR_BAD_ADDR;
+	struct vma_area *proxy_vdso_marked = NULL;
+	struct vma_area *proxy_vvar_marked = NULL;
+	struct parasite_vdso_vma_entry *args;
+	struct vma_area *vma;
+	int fd, ret = -1;
+	off_t off;
+	u64 pfn;
+
+	args = parasite_args(ctl, struct parasite_vdso_vma_entry);
+	fd = open_proc(pid, "pagemap");
+	if (fd < 0)
+		return -1;
+
+	list_for_each_entry(vma, &vma_area_list->h, list) {
+		if (!vma_area_is(vma, VMA_AREA_REGULAR))
+			continue;
+
+		if (vma_area_is(vma, VMA_FILE_SHARED) ||
+				vma_area_is(vma, VMA_FILE_PRIVATE))
+			continue;
+		/*
+		 * It might be possible VVAR area from marked
+		 * vDSO zone, we need to detect it earlier than
+		 * VDSO_PROT test because VVAR_PROT is a subset
+		 * of it but don't yield continue here,
+		 * sigh... what a mess.
+		 */
+		BUILD_BUG_ON(!(VDSO_PROT & VVAR_PROT));
+
+		if ((vma->e->prot & VVAR_PROT) == VVAR_PROT) {
+			if (proxy_vvar_addr != VVAR_BAD_ADDR &&
+			    proxy_vvar_addr == vma->e->start) {
+				BUG_ON(proxy_vvar_marked);
+				proxy_vvar_marked = vma;
+				continue;
+			}
+		}
+
+		if ((vma->e->prot & VDSO_PROT) != VDSO_PROT)
+			continue;
+
+		if (vma->e->prot != VDSO_PROT) {
+			pr_debug("Dropping %lx using extra protection test\n",
+				 vma->e->start);
+			continue;
+		}
+
+		if (vma->e->start > TASK_SIZE)
+			continue;
+
+		if (vma->e->flags & MAP_GROWSDOWN)
+			continue;
+
+		/*
+		 * I need to poke every potentially marked vma,
+		 * otherwise if task never called for vdso functions
+		 * page frame number won't be reported.
+		 */
+		args->start = vma->e->start;
+		args->len = vma_area_len(vma);
+
+		if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
+			pr_err("vdso: Parasite failed to poke for mark\n");
+			ret = -1;
+			goto err;
+		}
+
+		/*
+		 * Defer handling marked vdso until we walked over
+		 * all vmas and restore potentially remapped vDSO
+		 * area status.
+		 */
+		if (unlikely(args->is_marked)) {
+			if (proxy_vdso_marked) {
+				pr_err("Ow! Second vdso mark detected!\n");
+				ret = -1;
+				goto err;
+			}
+			proxy_vdso_marked = vma;
+			proxy_vdso_addr = args->proxy_vdso_addr;
+			proxy_vvar_addr = args->proxy_vvar_addr;
+			continue;
+		}
+
+		off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
+		ret = pread(fd, &pfn, sizeof(pfn), off);
+		if (ret < 0 || ret != sizeof(pfn)) {
+			pr_perror("Can't read pme for pid %d", pid);
+			ret = -1;
+			goto err;
+		}
+
+		pfn = PME_PFRAME(pfn);
+		if (!pfn) {
+			pr_err("Unexpected page fram number 0 for pid %d\n", pid);
+			ret = -1;
+			goto err;
+		}
+
+		/*
+		 * Setup proper VMA status. Note starting with 3.16
+		 * the [vdso]/[vvar] marks are reported correctly
+		 * even when they are remapped into a new place,
+		 * but only since that particular version of the
+		 * kernel!
+		 */
+		if (pfn == vdso_pfn) {
+			if (!vma_area_is(vma, VMA_AREA_VDSO)) {
+				pr_debug("vdso: Restore vDSO status by pfn at %lx\n",
+					 (long)vma->e->start);
+				vma->e->status |= VMA_AREA_VDSO;
+			}
+		} else {
+			if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
+				pr_debug("vdso: Drop mishinted vDSO status at %lx\n",
+					 (long)vma->e->start);
+				vma->e->status &= ~VMA_AREA_VDSO;
+			}
+		}
+	}
+
+	/*
+	 * There is marked vdso, it means such vdso is autogenerated
+	 * and must be dropped from vma list.
+	 */
+	if (proxy_vdso_marked) {
+		pr_debug("vdso: Found marked at %lx (proxy vDSO at %lx VVAR at %lx)\n",
+			 (long)proxy_vdso_marked->e->start,
+			 (long)proxy_vdso_addr, (long)proxy_vvar_addr);
+
+		/*
+		 * Don't forget to restore the proxy vdso/vvar status, since
+		 * it's unknown to the kernel.
+		 */
+		list_for_each_entry(vma, &vma_area_list->h, list) {
+			if (vma->e->start == proxy_vdso_addr) {
+				vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VDSO;
+				pr_debug("vdso: Restore proxy vDSO status at %lx\n",
+					 (long)vma->e->start);
+			} else if (vma->e->start == proxy_vvar_addr) {
+				vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VVAR;
+				pr_debug("vdso: Restore proxy VVAR status at %lx\n",
+					 (long)vma->e->start);
+			}
+		}
+
+		pr_debug("vdso: Droppping marked vdso at %lx\n",
+			 (long)proxy_vdso_marked->e->start);
+		list_del(&proxy_vdso_marked->list);
+		xfree(proxy_vdso_marked);
+		vma_area_list->nr--;
+
+		if (proxy_vvar_marked) {
+			pr_debug("vdso: Droppping marked vvar at %lx\n",
+				 (long)proxy_vvar_marked->e->start);
+			list_del(&proxy_vvar_marked->list);
+			xfree(proxy_vvar_marked);
+			vma_area_list->nr--;
+		}
+	}
+	ret = 0;
+err:
+	close(fd);
+	return ret;
+}
+
+static int vdso_fill_self_symtable(struct vdso_symtable *s)
+{
+	char buf[512];
+	int ret = -1;
+	FILE *maps;
+
+	*s = (struct vdso_symtable)VDSO_SYMTABLE_INIT;
+
+	maps = fopen_proc(PROC_SELF, "maps");
+	if (!maps) {
+		pr_perror("Can't open self-vma");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), maps)) {
+		unsigned long start, end;
+		char *has_vdso, *has_vvar;
+
+		has_vdso = strstr(buf, "[vdso]");
+		if (!has_vdso)
+			has_vvar = strstr(buf, "[vvar]");
+		else
+			has_vvar = NULL;
+
+		if (!has_vdso && !has_vvar)
+			continue;
+
+		ret = sscanf(buf, "%lx-%lx", &start, &end);
+		if (ret != 2) {
+			ret = -1;
+			pr_err("Can't find vDSO/VVAR bounds\n");
+			goto err;
+		}
+
+		if (has_vdso) {
+			if (s->vma_start != VDSO_BAD_ADDR) {
+				pr_err("Got second vDSO entry\n");
+				ret = -1;
+				goto err;
+			}
+			s->vma_start = start;
+			s->vma_end = end;
+
+			ret = vdso_fill_symtable((void *)start, end - start, s);
+			if (ret)
+				goto err;
+		} else {
+			if (s->vvar_start != VVAR_BAD_ADDR) {
+				pr_err("Got second VVAR entry\n");
+				ret = -1;
+				goto err;
+			}
+			s->vvar_start = start;
+			s->vvar_end = end;
+		}
+	}
+
+	/*
+	 * Validate its structure -- for new vDSO format the
+	 * structure must be like
+	 *
+	 * 7fff1f5fd000-7fff1f5fe000 r-xp 00000000 00:00 0 [vdso]
+	 * 7fff1f5fe000-7fff1f600000 r--p 00000000 00:00 0 [vvar]
+	 *
+	 * The areas may be in reverse order.
+	 *
+	 * 7fffc3502000-7fffc3504000 r--p 00000000 00:00 0 [vvar]
+	 * 7fffc3504000-7fffc3506000 r-xp 00000000 00:00 0 [vdso]
+	 *
+	 */
+	ret = 0;
+	if (s->vma_start != VDSO_BAD_ADDR) {
+		if (s->vvar_start != VVAR_BAD_ADDR) {
+			if (s->vma_end != s->vvar_start &&
+			    s->vvar_end != s->vma_start) {
+				ret = -1;
+				pr_err("Unexpected rt vDSO area bounds\n");
+				goto err;
+			}
+		}
+	} else {
+		ret = -1;
+		pr_err("Can't find rt vDSO\n");
+		goto err;
+	}
+
+	pr_debug("rt [vdso] %lx-%lx [vvar] %lx-%lx\n",
+		 s->vma_start, s->vma_end,
+		 s->vvar_start, s->vvar_end);
+
+err:
+	fclose(maps);
+	return ret;
+}
+
+int vdso_init(void)
+{
+	if (vdso_fill_self_symtable(&vdso_sym_rt))
+		return -1;
+	return vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn);
+}
diff --git a/cr-restore.c b/cr-restore.c
index 9d28e69e268d..e100164d4fcb 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -2751,8 +2751,13 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	 * might be completely unused so it's here just for convenience.
 	 */
 	restore_thread_exec_start	= restorer_sym(exec_mem_hint, __export_restore_thread);
+#ifdef CONFIG_PPC64
+	restore_task_exec_start		= restorer_sym(exec_mem_hint, __export_restore_task_trampoline);
+	rsti(current)->munmap_restorer	= restorer_sym(exec_mem_hint, __export_unmap_trampoline);
+#else
 	restore_task_exec_start		= restorer_sym(exec_mem_hint, __export_restore_task);
 	rsti(current)->munmap_restorer	= restorer_sym(exec_mem_hint, __export_unmap);
+#endif
 
 	exec_mem_hint += restorer_len;
 
diff --git a/include/image.h b/include/image.h
index 55e63dd9c7e2..c13ead0e51aa 100644
--- a/include/image.h
+++ b/include/image.h
@@ -11,7 +11,11 @@
 #include "bfd.h"
 #include "bug.h"
 
+#ifdef _ARCH_PPC64
+#define PAGE_IMAGE_SIZE	65536
+#else
 #define PAGE_IMAGE_SIZE	4096
+#endif /* _ARCH_PPC64 */
 #define PAGE_RSS	1
 #define PAGE_ANON	2
 
diff --git a/pie/Makefile b/pie/Makefile
index c0e8f62cee23..11620d7dcee9 100644
--- a/pie/Makefile
+++ b/pie/Makefile
@@ -10,6 +10,9 @@ obj-e			+= $(ARCH_DIR)/vdso-pie.o
 ifeq ($(SRCARCH),aarch64)
 asm-e			+= $(ARCH_DIR)/intraprocedure.o
 endif
+ifeq ($(SRCARCH), ppc64)
+asm-e			+= $(ARCH_DIR)/vdso-trampoline.o
+endif
 endif
 
 parasite-obj-y		+= parasite.o
@@ -18,6 +21,9 @@ parasite-libs-e		+= $(SYSCALL-LIB)
 
 restorer-obj-y		+= restorer.o
 restorer-obj-e		+= $(ARCH_DIR)/restorer.o
+ifeq ($(SRCARCH), ppc64)
+restorer-asm-e		+= $(ARCH_DIR)/restorer-trampoline.o
+endif
 restorer-libs-e		+= $(SYSCALL-LIB)
 
 #
diff --git a/pie/pie.lds.S.in b/pie/pie.lds.S.in
index f1dc526ef762..9e9c97f003c3 100644
--- a/pie/pie.lds.S.in
+++ b/pie/pie.lds.S.in
@@ -12,6 +12,8 @@ SECTIONS
 		. = ALIGN(32);
 		*(.got*)
 		. = ALIGN(32);
+		*(.toc*)
+		. = ALIGN(32);
 	} =0x00000000,
 
 	/DISCARD/ : {
diff --git a/protobuf/Makefile b/protobuf/Makefile
index d4e177462d11..0b1185203573 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -3,6 +3,7 @@ proto-obj-y	+= core.o
 proto-obj-y	+= core-x86.o
 proto-obj-y	+= core-arm.o
 proto-obj-y	+= core-aarch64.o
+proto-obj-y	+= core-ppc64.o
 proto-obj-y	+= cpuinfo.o
 proto-obj-y	+= inventory.o
 proto-obj-y	+= fdinfo.o
diff --git a/protobuf/core-ppc64.proto b/protobuf/core-ppc64.proto
new file mode 100644
index 000000000000..b874ccf88b9e
--- /dev/null
+++ b/protobuf/core-ppc64.proto
@@ -0,0 +1,23 @@
+message user_ppc64_regs_entry {
+	// Following is the list of regiters starting at r0.
+	repeated uint64 gpr		= 1;
+	required uint64 nip		= 2;
+	required uint64 msr		= 3;
+	required uint64 orig_gpr3	= 4;
+	required uint64 ctr		= 5;
+	required uint64 link		= 6;
+	required uint64 xer		= 7;
+	required uint64 ccr		= 8;
+	required uint64 trap		= 9;
+}
+
+message user_ppc64_fpstate_entry {
+	// Following is the list of regiters starting at fpr0
+	repeated uint64 fpregs 		= 1;
+}
+
+message thread_info_ppc64 {
+	required uint64			 	clear_tid_addr	= 1;
+	required user_ppc64_regs_entry	 	gpregs		= 2;
+	optional user_ppc64_fpstate_entry 	fpstate		= 3;
+}
diff --git a/protobuf/core.proto b/protobuf/core.proto
index 1f44a470cb78..9f70da929aab 100644
--- a/protobuf/core.proto
+++ b/protobuf/core.proto
@@ -1,6 +1,7 @@
 import "core-x86.proto";
 import "core-arm.proto";
 import "core-aarch64.proto";
+import "core-ppc64.proto";
 
 import "rlimit.proto";
 import "timer.proto";
@@ -70,12 +71,14 @@ message core_entry {
 		X86_64		= 1;
 		ARM             = 2;
 		AARCH64		= 3;
+		PPC64		= 4;
 	}
 
 	required march			mtype		= 1;
 	optional thread_info_x86	thread_info	= 2;
 	optional thread_info_arm	ti_arm		= 6;
 	optional thread_info_aarch64	ti_aarch64	= 8;
+	optional thread_info_ppc64	ti_ppc64	= 9;
 
 	optional task_core_entry	tc		= 3;
 	optional task_kobj_ids_entry	ids		= 4;
-- 
1.9.1



More information about the CRIU mailing list