[CRIU] [PATCH 4/5] [RFC] cr: implemented the support for the AArch64 architecture

Christopher Covington cov at codeaurora.org
Wed Feb 5 07:14:48 PST 2014


Hi Alexander,

Thanks for posting this! I've made a few comments and questions that I hope
might help improve the code further.

On 02/05/2014 01:14 AM, Alexander Kartashov wrote:

> diff --git a/arch/aarch64/Makefile b/arch/aarch64/Makefile
> new file mode 100644
> index 0000000..1eed399
> --- /dev/null
> +++ b/arch/aarch64/Makefile
> @@ -0,0 +1,59 @@
> +targets		+= syscalls
> +targets		+= crtools
> +
> +SYS-ASM		:= syscalls.S
> +
> +syscalls-asm-y	+= $(SYS-ASM:.S=).o
> +crtools-obj-y	+= crtools.o
> +crtools-obj-y	+= cpu.o
> +
> +SYS-DEF		:= ../arm/syscall.def
> +SYS-ASM-COMMON	:= syscall-common.S
> +SYS-TYPES	:= include/syscall-types.h
> +
> +SYS-CODES	:= include/syscall-codes.h
> +SYS-PROTO	:= include/syscall.h
> +
> +SYS-GEN		:= ../arm/gen-syscalls.pl
> +SYS-GEN-TBL	:= ../arm/gen-sys-exec-tbl.pl

I wonder if these scripts should be moved up one directory if they are going
to be shared across arm/arm64?

> +SYS-EXEC-TBL	:= sys-exec-tbl.c
> +
> +syscalls-asm-y-asmflags		+= -fpie -Wstrict-prototypes -Wa,--noexecstack
> +syscalls-asm-y-asmflags		+= -nostdlib -fomit-frame-pointer -I$(obj)
> +ASMFLAGS	+= -D__ASSEMBLY__
> +
> +ARCH_BITS	:= 64
> +
> +$(obj)/$(SYS-ASM): $(obj)/$(SYS-GEN) $(obj)/$(SYS-DEF) $(obj)/$(SYS-ASM-COMMON) $(SYS-TYPES)
> +	$(E) "  GEN     " $@
> +	$(Q) perl			\
> +		$(obj)/$(SYS-GEN)	\
> +		$(obj)/$(SYS-DEF)	\
> +		$(SYS-CODES)		\
> +		$(SYS-PROTO)		\
> +		$(obj)/$(SYS-ASM)	\
> +		$(SYS-ASM-COMMON)	\
> +		$(SYS-TYPES)		\
> +		$(ARCH_BITS)
> +
> +$(obj)/syscalls.o: $(obj)/$(SYS-ASM)
> +
> +$(obj)/$(SYS-EXEC-TBL): $(obj)/$(SYS-GEN-TBL) $(obj)/$(SYS-DEF)
> +	$(E) "  GEN     " $@
> +	$(Q) perl			\
> +		$(obj)/$(SYS-GEN-TBL)	\
> +		$(obj)/$(SYS-DEF)	\
> +		$(obj)/$(SYS-EXEC-TBL)	\
> +		$(ARCH_BITS)
> +
> +_all += $(obj)/$(SYS-EXEC-TBL)
> +
> +cleanup-y += $(obj)/$(SYS-EXEC-TBL) $(obj)/$(SYS-ASM)
> +cleanup-y += $(SYS-CODES)
> +cleanup-y += $(SYS-PROTO)
> +
> +ifneq ($(MAKECMDGOALS),clean)
> +deps-after := $(obj)/$(SYS-ASM)
> +incdeps := y
> +endif

> diff --git a/arch/aarch64/crtools.c b/arch/aarch64/crtools.c
> new file mode 100644
> index 0000000..899b40d
> --- /dev/null
> +++ b/arch/aarch64/crtools.c
> @@ -0,0 +1,245 @@
> +#include <string.h>
> +#include <unistd.h>
> +
> +#include "asm/types.h"
> +#include "asm/restorer.h"
> +#include "compiler.h"
> +#include "ptrace.h"
> +#include "asm/processor-flags.h"
> +#include "protobuf.h"
> +#include "protobuf/core.pb-c.h"
> +#include "protobuf/creds.pb-c.h"
> +#include "parasite-syscall.h"
> +#include "syscall.h"
> +#include "log.h"
> +#include "util.h"
> +#include "cpu.h"
> +#include "elf.h"
> +#include "parasite-syscall.h"
> +#include "restorer.h"
> +
> +
> +/*
> + * Injected syscall instruction
> + */
> +const char code_syscall[] = {
> +	0x01, 0x00, 0x00, 0xd4,		/* SVC #0 */
> +	0x00, 0x00, 0x20, 0xd4		/* BRK #0 */
> +};
> +
> +const int code_syscall_size = round_up(sizeof(code_syscall), sizeof(long));
> +
> +static inline void __check_code_syscall(void)
> +{
> +	BUILD_BUG_ON(sizeof(code_syscall) != BUILTIN_SYSCALL_SIZE);
> +	BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
> +}
> +
> +
> +void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
> +{
> +	regs->pc = new_ip;
> +	if (stack)
> +		regs->sp = (unsigned long)stack;
> +}
> +
> +bool arch_can_dump_task(pid_t pid)
> +{
> +	/*
> +	 * TODO: Add proper check here
> +	 */
> +	return true;
> +}
> +
> +int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
> +					unsigned long arg1,
> +					unsigned long arg2,
> +					unsigned long arg3,
> +					unsigned long arg4,
> +					unsigned long arg5,
> +					unsigned long arg6)
> +{
> +	user_regs_struct_t regs = ctl->orig.regs;
> +	int err;
> +
> +	regs.regs[8] = (unsigned long)nr;
> +	regs.regs[0] = arg1;
> +	regs.regs[1] = arg2;
> +	regs.regs[2] = arg3;
> +	regs.regs[3] = arg4;
> +	regs.regs[4] = arg5;
> +	regs.regs[5] = arg6;
> +	regs.regs[6] = 0;
> +	regs.regs[7] = 0;
> +
> +	err = __parasite_execute_syscall(ctl, &regs);
> +
> +	*ret = regs.regs[0];
> +	return err;
> +}
> +
> +#define assign_reg(dst, src, e)		dst->e = (__typeof__(dst->e))(src).e
> +
> +#ifndef NT_PRFPREG
> +// Copied from the Linux kernel source include/uapi/linux/elf.h
> +#define NT_PRFPREG 2
> +#endif

Why not just include <linux/elf.h>?

> +int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
> +{
> +        struct iovec iov;

Nit: spaces instead of tab

> +	struct user_fpsimd_state fpsimd;
> +	int i, ret;
> +
> +	pr_info("Dumping GP/FPU registers for %d\n", pid);
> +
> +	iov.iov_base = &regs;
> +	iov.iov_len = sizeof(user_regs_struct_t);
> +	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov))) {
> +		pr_err("Failed to obtain CPU registers for %d!", pid);
> +		goto err;
> +	}
> +
> +	iov.iov_base = &fpsimd;
> +	iov.iov_len = sizeof(fpsimd);
> +	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
> +		pr_err("Failed to obtain FPU registers for %d!", pid);
> +		goto err;
> +	}
> +
> +
> +	// Save the Aarch64 CPU state
> +	for (i = 0; i < 31; ++i)
> +		assign_reg(core->ti_aarch64->gpregs, regs, regs[i]);
> +	assign_reg(core->ti_aarch64->gpregs, regs, sp);
> +	assign_reg(core->ti_aarch64->gpregs, regs, pc);
> +	assign_reg(core->ti_aarch64->gpregs, regs, pstate);
> +
> +
> +	// Save the FP/SIMD state
> +	for (i = 0; i < 32; ++i)
> +	{
> +		core->ti_aarch64->fpsimd->vregs[2*i]     = fpsimd.vregs[i];
> +		core->ti_aarch64->fpsimd->vregs[2*i + 1] = fpsimd.vregs[i] >> 64;
> +	}
> +	assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpsr);
> +	assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpcr);
> +
> +	ret = 0;
> +
> +err:
> +	return ret;
> +}
> +
> +int arch_alloc_thread_info(CoreEntry *core)
> +{
> +	ThreadInfoAarch64 *ti_aarch64;
> +	UserAarch64RegsEntry *gpregs;
> +	UserAarch64FpsimdContextEntry *fpsimd;
> +
> +	ti_aarch64 = xmalloc(sizeof(*ti_aarch64));
> +	if (!ti_aarch64)
> +		goto err;
> +	thread_info_aarch64__init(ti_aarch64);
> +	core->ti_aarch64 = ti_aarch64;
> +
> +	gpregs = xmalloc(sizeof(*gpregs));
> +	if (!gpregs)
> +		goto err;
> +	user_aarch64_regs_entry__init(gpregs);
> +
> +	gpregs->regs = xmalloc(31*sizeof(uint64_t));
> +	if (!gpregs->regs)
> +		goto err;
> +	gpregs->n_regs = 31;
> +
> +	ti_aarch64->gpregs = gpregs;
> +
> +	fpsimd = xmalloc(sizeof(*fpsimd));
> +	if (!fpsimd)
> +		goto err;
> +	user_aarch64_fpsimd_context_entry__init(fpsimd);
> +	ti_aarch64->fpsimd = fpsimd;
> +	fpsimd->vregs = xmalloc(64*sizeof(fpsimd->vregs[0]));
> +	fpsimd->n_vregs = 64;
> +	if (!fpsimd->vregs)
> +		goto err;
> +
> +	return 0;
> +err:
> +	return -1;
> +}
> +
> +void arch_free_thread_info(CoreEntry *core)
> +{
> +	if (CORE_THREAD_ARCH_INFO(core)) {
> +		if (CORE_THREAD_ARCH_INFO(core)->fpsimd) {
> +			xfree(CORE_THREAD_ARCH_INFO(core)->fpsimd->vregs);
> +			xfree(CORE_THREAD_ARCH_INFO(core)->fpsimd);
> +		}
> +		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs->regs);
> +		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs);
> +		xfree(CORE_THREAD_ARCH_INFO(core));
> +		CORE_THREAD_ARCH_INFO(core) = NULL;
> +	}
> +}
> +
> +#define FPSIMD_MAGIC 0x46508001
> +
> +int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
> +{
> +	int i;
> +	struct fpsimd_context *fpsimd = &RT_SIGFRAME_FPU(sigframe);
> +
> +	if (core->ti_aarch64->fpsimd->n_vregs != 64)
> +		return 1;
> +
> +	for (i = 0; i < 32; ++i)
> +		fpsimd->vregs[i] =	(__uint128_t)core->ti_aarch64->fpsimd->vregs[2*i] |
> +					((__uint128_t)core->ti_aarch64->fpsimd->vregs[2*i + 1] << 64);
> +	assign_reg(fpsimd, *core->ti_aarch64->fpsimd, fpsr);
> +	assign_reg(fpsimd, *core->ti_aarch64->fpsimd, fpcr);
> +
> +	fpsimd->head.magic = FPSIMD_MAGIC;
> +	fpsimd->head.size = sizeof(*fpsimd);
> +
> +	return 0;
> +}
> +
> +void *mmap_seized(
> +		struct parasite_ctl *ctl,
> +		void *addr, size_t length, int prot,
> +		int flags, int fd, off_t offset)
> +{
> +	unsigned long map;
> +	int err;
> +
> +	if (offset & ~PAGE_MASK)
> +		return 0;
> +
> +	err = syscall_seized(ctl, __NR_mmap, &map,
> +			(unsigned long)addr, length, prot, flags, fd, offset);
> +	if (err < 0 || map > TASK_SIZE)
> +		map = 0;
> +
> +	return (void *)map;
> +}
> +
> +int restore_gpregs(struct rt_sigframe *f, UserRegsEntry *r)
> +{
> +#define CPREG1(d)       f->uc.uc_mcontext.d = r->d
> +#define CPREG2(d, s)    f->uc.uc_mcontext.d = r->s
> +
> +	int i;
> +
> +	for (i = 0; i < 31; ++i)
> +		CPREG1(regs[i]);
> +	CPREG1(sp);
> +	CPREG1(pc);
> +	CPREG1(pstate);
> +
> +#undef CPREG1
> +#undef CPREG2
> +
> +	return 0;
> +}
> diff --git a/arch/aarch64/include/asm/atomic.h b/arch/aarch64/include/asm/atomic.h
> new file mode 100644
> index 0000000..00b73ba
> --- /dev/null
> +++ b/arch/aarch64/include/asm/atomic.h
> @@ -0,0 +1,68 @@
> +#ifndef __CR_ATOMIC_H__
> +#define __CR_ATOMIC_H__
> +
> +typedef struct {
> +	int counter;
> +} atomic_t;
> +
> +
> +/* Copied from the Linux kernel header arch/arm64/include/asm/atomic.h */
> +
> +static inline int atomic_read(const atomic_t *v)
> +{
> +	return (*(volatile int *)&(v)->counter);
> +}
> +
> +static inline void atomic_set(atomic_t *v, int i)
> +{
> +	v->counter = i;
> +}
> +
> +#define atomic_get atomic_read
> +
> +static inline int atomic_add_return(int i, atomic_t *v)
> +{
> +	unsigned long tmp;
> +	int result;
> +
> +	asm volatile("// atomic_add_return\n"
> +"1:	ldaxr	%w0, %2\n"
> +"	add	%w0, %w0, %w3\n"
> +"	stlxr	%w1, %w0, %2\n"
> +"	cbnz	%w1, 1b"
> +	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
> +	: "Ir" (i)
> +	: "cc", "memory");
> +
> +	return result;
> +}

There are some changes in-flight:

http://lists.infradead.org/pipermail/linux-arm-kernel/2014-February/229588.html
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-February/229587.html

> +static inline int atomic_sub_return(int i, atomic_t *v)
> +{
> +	unsigned long tmp;
> +	int result;
> +
> +	asm volatile("// atomic_sub_return\n"
> +"1:	ldaxr	%w0, %2\n"
> +"	sub	%w0, %w0, %w3\n"
> +"	stlxr	%w1, %w0, %2\n"
> +"	cbnz	%w1, 1b"
> +	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
> +	: "Ir" (i)
> +	: "cc", "memory");
> +
> +	return result;
> +}
> +
> +static inline int atomic_inc(atomic_t *v) { return atomic_add_return(1, v) - 1; }
> +
> +static inline int atomic_add(int val, atomic_t *v) { return atomic_add_return(val, v) - val; }
> +
> +static inline int atomic_dec(atomic_t *v) { return atomic_sub_return(1, v) + 1; }
> +
> +/* true if the result is 0, or false for all other cases. */
> +#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
> +
> +#define atomic_inc_return(v)	(atomic_add_return(1, v))
> +
> +#endif /* __CR_ATOMIC_H__ */
> diff --git a/arch/aarch64/include/asm/bitops.h b/arch/aarch64/include/asm/bitops.h
> new file mode 100644
> index 0000000..cf42235
> --- /dev/null
> +++ b/arch/aarch64/include/asm/bitops.h
> @@ -0,0 +1,116 @@
> +#ifndef __CR_BITOPS_H__
> +#define __CR_BITOPS_H__
> +
> +#include "asm/bitsperlong.h"
> +
> +#define DIV_ROUND_UP(n,d)	(((n) + (d) - 1) / (d))
> +#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
> +
> +#define DECLARE_BITMAP(name, bits)		\
> +	unsigned long name[BITS_TO_LONGS(bits)]
> +
> +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
> +/* Technically wrong, but this avoids compilation errors on some gcc
> +   versions. */
> +#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
> +#else
> +#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> +#endif
> +
> +#define ADDR				BITOP_ADDR(addr)
> +
> +static inline void set_bit(int nr, volatile unsigned long *addr) {
> +	addr += nr / BITS_PER_LONG;
> +	*addr |= (1 << (nr % BITS_PER_LONG));
> +}
> +
> +static inline void change_bit(int nr, volatile unsigned long *addr)
> +{
> +	addr += nr / BITS_PER_LONG;
> +	*addr ^= (1 << (nr % BITS_PER_LONG));
> +}
> +
> +static inline int test_bit(int nr, volatile const unsigned long *addr)
> +{
> +	addr += nr / BITS_PER_LONG;
> +	return (*addr & (1 << (nr % BITS_PER_LONG))) ? -1 : 0;
> +}
> +
> +static inline void clear_bit(int nr, volatile unsigned long *addr)
> +{
> +	addr += nr / BITS_PER_LONG;
> +	*addr &= ~(1 << (nr % BITS_PER_LONG));
> +}
> +
> +/**
> + * __ffs - find first set bit in word
> + * @word: The word to search
> + *
> + * Undefined if no bit exists, so code should check against 0 first.
> + */
> +static inline unsigned long __ffs(unsigned long word)
> +{
> +	int p = 0;
> +
> +	for (; p < 8*sizeof(word); ++p) {

At first glance it seems as if using BITS_PER_LONG above would make the file
no longer architecture specific.

> +		if (word & 1) {
> +			break;
> +		}
> +
> +		word >>= 1;
> +	}
> +
> +	return p;
> +}
> +
> +#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
> +
> +/*
> + * Find the next set bit in a memory region.
> + */
> +static inline
> +unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
> +			    unsigned long offset)
> +{
> +	const unsigned long *p = addr + BITOP_WORD(offset);
> +	unsigned long result = offset & ~(BITS_PER_LONG-1);
> +	unsigned long tmp;
> +
> +	if (offset >= size)
> +		return size;
> +	size -= result;
> +	offset %= BITS_PER_LONG;
> +	if (offset) {
> +		tmp = *(p++);
> +		tmp &= (~0UL << offset);
> +		if (size < BITS_PER_LONG)
> +			goto found_first;
> +		if (tmp)
> +			goto found_middle;
> +		size -= BITS_PER_LONG;
> +		result += BITS_PER_LONG;
> +	}
> +	while (size & ~(BITS_PER_LONG-1)) {
> +		if ((tmp = *(p++)))
> +			goto found_middle;
> +		result += BITS_PER_LONG;
> +		size -= BITS_PER_LONG;
> +	}
> +	if (!size)
> +		return result;
> +	tmp = *p;
> +
> +found_first:
> +	tmp &= (~0UL >> (BITS_PER_LONG - size));
> +	if (tmp == 0UL)		/* Are any bits set? */
> +		return result + size;	/* Nope. */
> +found_middle:
> +	return result + __ffs(tmp);
> +}
> +
> +#define for_each_bit(i, bitmask)				\
> +	for (i = find_next_bit(bitmask, sizeof(bitmask), 0);	\
> +	     i < sizeof(bitmask);				\
> +	     i = find_next_bit(bitmask, sizeof(bitmask), i + 1))
> +
> +#endif /* __CR_BITOPS_H__ */


> diff --git a/arch/aarch64/include/asm/dump.h b/arch/aarch64/include/asm/dump.h
> new file mode 100644
> index 0000000..396c97e
> --- /dev/null
> +++ b/arch/aarch64/include/asm/dump.h
> @@ -0,0 +1,11 @@
> +#ifndef __CR_ASM_DUMP_H__
> +#define __CR_ASM_DUMP_H__
> +
> +extern int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core);
> +extern int arch_alloc_thread_info(CoreEntry *core);
> +extern void arch_free_thread_info(CoreEntry *core);
> +
> +
> +static inline void core_put_tls(CoreEntry *core, u32 tls) { }

u32?

> +#endif

> diff --git a/arch/aarch64/include/asm/parasite.h b/arch/aarch64/include/asm/parasite.h
> new file mode 100644
> index 0000000..19959b7
> --- /dev/null
> +++ b/arch/aarch64/include/asm/parasite.h
> @@ -0,0 +1,6 @@
> +#ifndef __ASM_PARASITE_H__
> +#define __ASM_PARASITE_H__
> +
> +static inline u32 arch_get_tls(void) { return 0; }

u32?

> +#endif

> diff --git a/arch/aarch64/include/asm/restore.h b/arch/aarch64/include/asm/restore.h
> new file mode 100644
> index 0000000..8aae682
> --- /dev/null
> +++ b/arch/aarch64/include/asm/restore.h
> @@ -0,0 +1,28 @@
> +#ifndef __CR_ASM_RESTORE_H__
> +#define __CR_ASM_RESTORE_H__
> +
> +#include "asm/restorer.h"
> +
> +#include "protobuf/core.pb-c.h"
> +
> +#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start,	\
> +			      task_args)			\
> +	asm volatile(						\
> +			"movn x0, #15			\n"	\
> +			"and  x0, x0, %0		\n"	\
> +			"mov  sp, x0			\n"	\

and sp, %0, #~15

> +			"mov  x1, %1			\n"	\
> +			"mov  x0, %2			\n"	\
> +			"br   x1			\n"	\
> +			:					\
> +			: "r"(new_sp),				\
> +			  "r"(restore_task_exec_start),		\
> +			  "r"(task_args)			\
> +			: "sp", "x0", "x1", "memory")

I didn't notice any memory accesses--should it be in the clobber list?

> +static inline void core_get_tls(CoreEntry *pcore, uint32_t *ptls) { }

uint32_t?

> +int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core);
> +
> +#endif
> diff --git a/arch/aarch64/include/asm/restorer.h b/arch/aarch64/include/asm/restorer.h
> new file mode 100644
> index 0000000..5fa929e
> --- /dev/null
> +++ b/arch/aarch64/include/asm/restorer.h
> @@ -0,0 +1,140 @@
> +#ifndef __CR_ASM_RESTORER_H__
> +#define __CR_ASM_RESTORER_H__
> +
> +#include "asm/types.h"
> +#include "protobuf/core.pb-c.h"
> +
> +/* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */
> +
> +#define FPSIMD_MAGIC    0x46508001
> + 
> +typedef struct fpsimd_context fpu_state_t;
> +
> +
> +struct aux_context {
> +	struct fpsimd_context fpsimd;
> +	/* additional context to be added before "end" */
> +	struct _aarch64_ctx end;
> +};
> +
> +/* Copied from the Linux kernel header arch/arm64/include/asm/sigcontext.h */
> +
> +struct rt_sigcontext {
> +	u64 fault_address;
> +	/* AArch64 registers */
> +	u64 regs[31];
> +	u64 sp;
> +	u64 pc;
> +	u64 pstate;
> +	/* 4K reserved for FP/SIMD state and future expansion */
> +	u8 __reserved[4096] __attribute__((__aligned__(16)));
> +};
> +
> +
> +#include "sigframe.h"
> +
> +/* Copied from the Linux kernel source arch/arm64/include/asm/ucontext.h */
> +
> +/*
> + * There're at least two problems with the AArch64 sigframe structure:
> + *
> + * - the layout of the struct ucontext isn't compatible with x86 and ARM
> + *   so it's impossible to use the definition from sigframe.h;
> + *
> + * - the struct ucontext is available in the userspace, however the type
> + *   of the field uc_sigmask is sigset_t --- it's impossible to identify
> + *   it with the type k_sigset_t for some reason.
> + *
> + */
> +
> +struct arch_rt_ucontext {
> +	unsigned long	 	uc_flags;
> +	struct ucontext        *uc_link;
> +	stack_t			uc_stack;
> +	k_rtsigset_t		uc_sigmask;
> +	/* glibc uses a 1024-bit sigset_t */
> +	u8			__unused[1024 / 8 - sizeof(k_rtsigset_t)];
> +	/* last for future expansion */
> +	struct rt_sigcontext	uc_mcontext;
> +};
> +
> +/* Copied from the kernel source arch/arm64/kernel/signal.c */
> +
> +struct rt_sigframe {
> +	siginfo_t info;
> +	struct arch_rt_ucontext uc;
> +	u64 fp;
> +	u64 lr;
> +};
> +
> +
> +#define ARCH_RT_SIGRETURN(new_sp)						\
> +	asm volatile(								\
> +			"mov sp, %0					\n" 	\
> +			"mov x8, #"__stringify(__NR_rt_sigreturn)"	\n"	\
> +			"svc #0						\n"	\
> +			:							\
> +			: "r"(new_sp)						\
> +			: "sp", "x8", "memory")

Is the memory clobber necessary?

> +#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, 		\
> +			     thread_args, clone_restore_fn)			\
> +	asm volatile(								\
> +			"clone_emul:					\n"	\
> +			"movn x2, #15					\n"	\
> +			"and x2, x2, %2					\n"	\

and x2, %2, #~15

> +			"sub x2, x2, #16				\n"	\
> +			"stp %5, %6, [x2]				\n"	\
> +			"mov x0, %1					\n"	\
> +			"mov x2, %3					\n"	\
> +			"mov x3, %4					\n"	\
> +			"mov x8, #"__stringify(__NR_clone)"		\n"	\
> +			"svc #0				        	\n"	\
> +										\
> +			"cmp x0, #0			        	\n"	\
> +			"beq thread_run					\n"	\

cbz x0, thread_run

> +			"mov %0, x0					\n"	\
> +			"b   clone_end					\n"	\
> +										\
> +			"thread_run:					\n"	\
> +			"ldp x1, x0, [sp]		        	\n"	\
> +			"br  x1						\n"	\
> +										\
> +			"clone_end:					\n"	\
> +			: "=r"(ret)						\
> +			: "r"(clone_flags),					\
> +			"r"(new_sp),						\
> +			"r"(&parent_tid),					\
> +			"r"(&thread_args[i].pid),				\
> +			"r"(clone_restore_fn),					\
> +			"r"(&thread_args[i])					\
> +			: "x0", "x1", "x2", "x3", "x7", "memory")

It doesn't look like you modify memory, but you do modify x8 instead of x7.

> +#define ARCH_FAIL_CORE_RESTORE					\
> +	asm volatile(						\
> +			"mov sp, %0			    \n"	\
> +			"mov x0, #0			    \n"	\
> +			"b   x0			            \n"	\
> +			:					\
> +			: "r"(ret)				\
> +			: "memory")

Should x0 and sp go into the clobber list and memory be taken out?

> +#define RT_SIGFRAME_UC(rt_sigframe) rt_sigframe->uc
> +#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.pc)
> +#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
> +#define RT_SIGFRAME_FPU(rt_sigframe) ((struct aux_context*)&(rt_sigframe)->uc.uc_mcontext.__reserved)->fpsimd
> +
> +#define SIGFRAME_OFFSET 0
> +
> +
> +int restore_gpregs(struct rt_sigframe *f, UserAarch64RegsEntry *r);
> +int restore_nonsigframe_gpregs(UserAarch64RegsEntry *r);
> +
> +static inline int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, fpu_state_t *fpu_state) { return 0; }
> +
> +static inline void restore_tls(u32 tls) { }

u32?

> +#endif

> diff --git a/arch/aarch64/include/asm/syscall-aux.S b/arch/aarch64/include/asm/syscall-aux.S
> new file mode 100644
> index 0000000..18df836
> --- /dev/null
> +++ b/arch/aarch64/include/asm/syscall-aux.S
> @@ -0,0 +1,32 @@
> +ENTRY(sys_open)
> +	mov x3, x2
> +	mov x2, x1
> +	mov x1, x0
> +	mov x0, #-100
> +	b   sys_openat
> +END(sys_open)
> +
> +
> +ENTRY(sys_mkdir)
> +	mov x3, x2
> +	mov x2, x1
> +	mov x1, x0
> +	mov x0, #-100
> +	b   sys_mkdirat
> +END(sys_mkdir)
> +
> +
> +ENTRY(sys_rmdir)
> +	mov x1, x0
> +	mov x0, #-100
> +	mov x2, #0x200		// flags = AT_REMOVEDIR
> +	b   sys_unlinkat
> +END(sys_rmdir)
> +
> +
> +ENTRY(sys_unlink)
> +	mov x1, x0
> +	mov x0, #-100
> +	mov x2, #0		// flags = 0
> +	b   sys_unlinkat
> +END(sys_unlink)

This might be slightly faster to read if you always fill the registers in the
same order, probably high to low because of the rearranging required in the
first two.

> diff --git a/arch/aarch64/include/asm/types.h b/arch/aarch64/include/asm/types.h
> new file mode 100644
> index 0000000..777af66
> --- /dev/null
> +++ b/arch/aarch64/include/asm/types.h
> @@ -0,0 +1,185 @@
> +#ifndef __CR_ASM_TYPES_H__
> +#define __CR_ASM_TYPES_H__
> +
> +#include <stdint.h>
> +#include <stdbool.h>
> +#include <signal.h>
> +#include "protobuf/core.pb-c.h"
> +
> +#include "asm/bitops.h"
> +#include "asm/int.h"
> +
> +/* prctl.h */
> +#define PR_SET_NAME		15
> +#define PR_GET_NAME		16
> +
> +#define PR_CAPBSET_DROP		24
> +#define PR_GET_SECUREBITS	27
> +#define PR_SET_SECUREBITS	28
> +
> +#define SECURE_NO_SETUID_FIXUP	2
> +
> +#define PR_SET_MM		35
> +# define PR_SET_MM_START_CODE		1
> +# define PR_SET_MM_END_CODE		2
> +# define PR_SET_MM_START_DATA		3
> +# define PR_SET_MM_END_DATA		4
> +# define PR_SET_MM_START_STACK		5
> +# define PR_SET_MM_START_BRK		6
> +# define PR_SET_MM_BRK			7
> +# define PR_SET_MM_ARG_START		8
> +# define PR_SET_MM_ARG_END		9
> +# define PR_SET_MM_ENV_START		10
> +# define PR_SET_MM_ENV_END		11
> +# define PR_SET_MM_AUXV			12
> +# define PR_SET_MM_EXE_FILE		13
> +
> +#define PR_GET_TID_ADDRESS     40
> +
> +/* fcntl */
> +#ifndef F_LINUX_SPECIFIC_BASE
> +#define F_LINUX_SPECIFIC_BASE	1024
> +#endif
> +#ifndef F_SETPIPE_SZ
> +# define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
> +#endif
> +#ifndef F_GETPIPE_SZ
> +# define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
> +#endif
> +
> +#ifndef F_GETOWNER_UIDS
> +#define F_GETOWNER_UIDS	17
> +#endif
> +
> +#define CLONE_CHILD_USEPID      0x02000000
> +#define CLONE_VFORK		0x00004000
> +
> +#define SIGMAX			64
> +#define SIGMAX_OLD		31
> +
> +#define ERESTARTSYS		512
> +#define ERESTARTNOINTR		513
> +#define ERESTARTNOHAND		514
> +#define ERESTART_RESTARTBLOCK	516
> +
> +#define MAJOR(dev)		((dev)>>8)
> +#define MINOR(dev)		((dev) & 0xff)
> +
> +#define _LINUX_CAPABILITY_VERSION_3	0x20080522
> +#define _LINUX_CAPABILITY_U32S_3	2
> +
> +
> +typedef void rt_signalfn_t(int, siginfo_t *, void *);
> +typedef rt_signalfn_t *rt_sighandler_t;
> +
> +typedef void rt_restorefn_t(void);
> +typedef rt_restorefn_t *rt_sigrestore_t;
> +
> +#define _KNSIG		64
> +#define _NSIG_BPW	64
> +
> +#define _KNSIG_WORDS	(_KNSIG / _NSIG_BPW)
> +
> +typedef struct {
> +	unsigned long sig[_KNSIG_WORDS];
> +} k_rtsigset_t;
> +
> +static inline void ksigfillset(k_rtsigset_t *set)
> +{
> +	int i;
> +	for (i = 0; i < _KNSIG_WORDS; i++)
> +		set->sig[i] = (unsigned long)-1;
> +}
> +
> +#define SA_RESTORER	0x04000000

This is not defined in the AArch64 kernel. Since the only use is OR'd with
other bits, can you just define it to 0 and maybe include the "New
architectures should not define the obsolete SA_RESTORER" comment?

> +typedef struct {
> +	rt_sighandler_t	rt_sa_handler;
> +	unsigned long	rt_sa_flags;
> +	rt_sigrestore_t	rt_sa_restorer;
> +	k_rtsigset_t	rt_sa_mask;
> +} rt_sigaction_t;
> +
> +/*
> + * Copied from the Linux kernel header arch/arm64/include/uapi/asm/ptrace.h
> + *
> + * A thread ARM CPU context
> + */

Why can't you just include <asm/ptrace.h>?

> +typedef struct {
> +	u64	regs[31];
> +	u64	sp;
> +	u64	pc;
> +	u64	pstate;
> +} user_regs_struct_t;
> +
> +
> +#define ASSIGN_TYPED(a, b) do { a = (typeof(a))b; } while (0)
> +#define ASSIGN_MEMBER(a,b,m) do { ASSIGN_TYPED((a)->m, (b)->m); } while (0)
> +
> +#ifndef PAGE_SIZE
> +# define PAGE_SIZE	4096
> +#endif

Having this as a build-time constant means there can't be a single binary that
works on all page sizes, but the kernel would probably have to provide a new
run-time API for checking the page size (unless there's one already?) to do
things any differently.

> +#ifndef PAGE_MASK
> +# define PAGE_MASK	(~(PAGE_SIZE - 1))
> +#endif
> +
> +/* For UNIX sockets data */
> +#ifndef SCM_MAX_FD
> +# define SCM_MAX_FD	253
> +#endif
> +
> +#include <fcntl.h>
> +
> +#ifndef F_SETOWN_EX
> +#define F_SETOWN_EX	15
> +#define F_GETOWN_EX	16
> +
> +struct f_owner_ex {
> +	int	type;
> +	pid_t	pid;
> +};
> +#endif
> +
> +#ifndef MAP_HUGETLB
> +# define MAP_HUGETLB 0x40000
> +#endif
> +
> +#ifndef MADV_HUGEPAGE
> +# define MADV_HUGEPAGE 14
> +#endif
> +
> +#ifndef MADV_NOHUGEPAGE
> +# define MADV_NOHUGEPAGE 15
> +#endif
> +
> +#ifndef MADV_DONTDUMP
> +# define MADV_DONTDUMP 16
> +#endif
> +
> +#define REG_RES(regs)		((regs).regs[0])
> +#define REG_IP(regs)		((regs).pc)
> +#define REG_SYSCALL_NR(regs)	((regs).regs[8])
> +
> +// Copied from the Linux kernel arch/arm64/include/asm/memory.h
> +// FIXME: what about a 32bit task?

Last time I checked I was able to restore an A32 task with an A32 CRIU on an
A64 kernel out of the box, but dump didn't work (maybe it was the
GETREGS/GETREGSET issue). One possibility might be to just fix A32 CRIU dump
on an A64 kernel and then say if you want to dump an A32 task, use an A32 CRIU.

> +#define TASK_SIZE (1ULL << 39)
> +
> +#define AT_VECTOR_SIZE 40
> +
> +typedef UserAarch64RegsEntry UserRegsEntry;
> +
> +#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__AARCH64
> +
> +#define CORE_THREAD_ARCH_INFO(core) core->ti_aarch64
> +
> +#define TI_SP(core) ((core)->ti_aarch64->gpregs->sp)
> +
> +typedef uint64_t auxv_t;
> +
> +static inline void *decode_pointer(uint64_t v) { return (void*)v; }
> +static inline uint64_t encode_pointer(void *p) { return (uint64_t)p; }
> +
> +#endif /* __CR_ASM_TYPES_H__ */

Regards,
Christopher

-- 
Employee of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by the Linux Foundation.


More information about the CRIU mailing list