[CRIU] [PATCH 01/20] x86: moved x86-specific files into the directory arch/x86.

Pavel Emelyanov xemul at parallels.com
Wed Dec 12 10:43:11 EST 2012


On 12/12/2012 06:48 PM, alekskartashov at parallels.com wrote:
> From: Alexander Kartashov <alekskartashov at parallels.com>
> 
> The following files goes into the directory arch/x86 unmodified:
> 
> * include/atomic.h,
> * include/linkage.h,
> * include/memcpy_64.h,
> * pie/parasite-head-x86-64.S,
> * include/processor-flags.h,
> * include/syscall-x86-64.def.
> 
> Removed target architecture checks from the files:
> 
> * include/image.h,
> * include/syscall-types.h.
> 
> The following files are split into machine-independent (that goes into the directory arch/x86)
> and machine-dependent parts:
> 
> * include/types.h --- machine dependent part is in the file arch/x86/arch-types.h,
> * include/bitops.h --- machine dependent part is in the file arch/x86/arch_bitops.h.
> 
> The following files contain machine-dependent parts extracted from other sources:
> 
> * arch/x86/arch_parasite.h --- contains machine-dependent part of the parasite;
>   it's empty now and intended only to support multiarch builds.
> * arch/x86/arch_cr_dump.h --- contains machine-dependent parts of cr-dump.c.
> * arch/x86/restorer_private.h --- contains machine-dependent parts of the restorer.
> * arch/x86/arch_restorer.h --- contains machine-dependent parts of cr-restore.c.

Much much better. Please, find comments inline. Some of questions probably have
implicit answers in the subsequent patches, but since this set is HUGE, I ask
them explicitly.

> Signed-off-by: Alexander Kartashov <alekskartashov at parallels.com>

> diff --git a/arch/x86/Makefile.inc b/arch/x86/Makefile.inc
> new file mode 100644
> index 0000000..b63bd25
> --- /dev/null
> +++ b/arch/x86/Makefile.inc
> @@ -0,0 +1,7 @@
> +DEFINES   += -DCONFIG_X86_64
> +ARCH_BITS := 64
> +
> +CC 	 := gcc
> +LD 	 := ld
> +OBJCOPY	 := objcopy
> +LDARCH	 := i386:x86-64

Where (int what patch) do you start using this file?

> diff --git a/arch/x86/arch-types.h b/arch/x86/arch-types.h
> new file mode 100644
> index 0000000..ac99ee4
> --- /dev/null
> +++ b/arch/x86/arch-types.h

An overall note on the file names -- it's ugly, when a file name is arch/xxx/arch-yyy.h.
Please, rework the arch-dependent headers model the way it looks in the Linux kernel.

I.e.
- files look like arch/<arch>/include/asm/<file>.h
- everyone who need arch-specific header does #include <asm/<file>.h>
- the -I arch/<arch/include/ option is added by builder

> @@ -0,0 +1,92 @@
> +#ifndef PROCESSOR_X86_64_H_
> +#define PROCESSOR_X86_64_H_
> +
> +#define PRIs PRIu64

What is it?

> +#define REG_IP(regs)  regs.ip
> +#define REG_RES(regs) regs.ax

This wasn't "moved", but "added". It shouldn't be in this patch.

> +#define _NSIG_BPW 64
> +#define TASK_SIZE (1UL << 47) - PAGE_SIZE
> +
> +#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__X86_64
> +
> +#define CORE_THREAD_INFO(core) core->thread_info
> +#define CORE_GPREGS(core) (core->thread_info->gpregs)

Neither should this.

> +
> +#ifdef CONFIG_X86_64
> +# define AT_VECTOR_SIZE 44
> +#else
> +# define AT_VECTOR_SIZE 22		/* Not needed at moment */
> +#endif
> +
> +typedef uint64_t auxv_t;

This is uint64_t for arm as well, why in arch?

> +
> +#define SIGFRAME_OFFSET 8

This should be in another patch, which does use this constant.

> +
> +#define UserRegsEntry UserX86RegsEntry
> +
> +typedef struct {
> +	int __unused;
> +} UserFPState;

And these two too.

> +
> +#endif
> diff --git a/arch/x86/arch_bitops.h b/arch/x86/arch_bitops.h
> new file mode 100644
> index 0000000..552b135
> --- /dev/null
> +++ b/arch/x86/arch_bitops.h
> @@ -0,0 +1,45 @@
> +#ifndef X86_64_BITOPS_H_
> +#define X86_64_BITOPS_H_
> +
> +static inline void set_bit(int nr, volatile unsigned long *addr)
> +{
> +	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
> +}
> +
> +static inline void change_bit(int nr, volatile unsigned long *addr)
> +{
> +	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
> +}
> +
> +static inline int test_bit(int nr, volatile const unsigned long *addr)
> +{
> +	int oldbit;
> +
> +	asm volatile("bt %2,%1\n\t"
> +		     "sbb %0,%0"
> +		     : "=r" (oldbit)
> +		     : "m" (*(unsigned long *)addr), "Ir" (nr));
> +
> +	return oldbit;
> +}
> +
> +static inline void clear_bit(int nr, volatile unsigned long *addr)
> +{
> +	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
> +}
> +
> +/**
> + * __ffs - find first set bit in word
> + * @word: The word to search
> + *
> + * Undefined if no bit exists, so code should check against 0 first.
> + */
> +static inline unsigned long __ffs(unsigned long word)
> +{
> +	asm("bsf %1,%0"
> +		: "=r" (word)
> +		: "rm" (word));
> +	return word;
> +}
> +
> +#endif

Since this patch is intended to move the code, there should be respective
lines-removing portions in this patch.

> diff --git a/arch/x86/arch_cr_dump.h b/arch/x86/arch_cr_dump.h
> new file mode 100644
> index 0000000..a469f99
> --- /dev/null
> +++ b/arch/x86/arch_cr_dump.h
> @@ -0,0 +1,163 @@
> +#ifndef CR_DUMP_ARCH_H_X86_64_
> +#define CR_DUMP_ARCH_H_X86_64_
> +
> +#define TI_SP(core) ((core)->thread_info->gpregs->sp)
> +
> +#define assign_reg(dst, src, e)		dst->e = (__typeof__(dst->e))src.e
> +#define assign_array(dst, src, e)	memcpy(dst->e, &src.e, sizeof(src.e))
> +
> +static int get_task_regs(pid_t pid, CoreEntry *core, const struct parasite_ctl *ctl)
> +{
> +	user_fpregs_struct_t fpregs	= {-1};
> +	user_regs_struct_t regs		= {-1};
> +
> +	int ret = -1;
> +
> +	pr_info("Dumping GP/FPU registers ... ");
> +
> +	if (ctl)
> +		regs = ctl->regs_orig;
> +	else {
> +		if (ptrace(PTRACE_GETREGS, pid, NULL, &regs)) {
> +			pr_err("Can't obtain GP registers for %d\n", pid);
> +			goto err;
> +		}
> +	}
> +
> +	if (ptrace(PTRACE_GETFPREGS, pid, NULL, &fpregs)) {
> +		pr_err("Can't obtain FPU registers for %d\n", pid);
> +		goto err;
> +	}
> +
> +	/* Did we come from a system call? */
> +	if ((int)regs.orig_ax >= 0) {
> +		/* Restart the system call */
> +		switch ((long)(int)regs.ax) {
> +		case -ERESTARTNOHAND:
> +		case -ERESTARTSYS:
> +		case -ERESTARTNOINTR:
> +			regs.ax = regs.orig_ax;
> +			regs.ip -= 2;
> +			break;
> +		case -ERESTART_RESTARTBLOCK:
> +			regs.ax = __NR_restart_syscall;
> +			regs.ip -= 2;
> +			break;
> +		}
> +	}
> +
> +	assign_reg(core->thread_info->gpregs, regs, r15);
> +	assign_reg(core->thread_info->gpregs, regs, r14);
> +	assign_reg(core->thread_info->gpregs, regs, r13);
> +	assign_reg(core->thread_info->gpregs, regs, r12);
> +	assign_reg(core->thread_info->gpregs, regs, bp);
> +	assign_reg(core->thread_info->gpregs, regs, bx);
> +	assign_reg(core->thread_info->gpregs, regs, r11);
> +	assign_reg(core->thread_info->gpregs, regs, r10);
> +	assign_reg(core->thread_info->gpregs, regs, r9);
> +	assign_reg(core->thread_info->gpregs, regs, r8);
> +	assign_reg(core->thread_info->gpregs, regs, ax);
> +	assign_reg(core->thread_info->gpregs, regs, cx);
> +	assign_reg(core->thread_info->gpregs, regs, dx);
> +	assign_reg(core->thread_info->gpregs, regs, si);
> +	assign_reg(core->thread_info->gpregs, regs, di);
> +	assign_reg(core->thread_info->gpregs, regs, orig_ax);
> +	assign_reg(core->thread_info->gpregs, regs, ip);
> +	assign_reg(core->thread_info->gpregs, regs, cs);
> +	assign_reg(core->thread_info->gpregs, regs, flags);
> +	assign_reg(core->thread_info->gpregs, regs, sp);
> +	assign_reg(core->thread_info->gpregs, regs, ss);
> +	assign_reg(core->thread_info->gpregs, regs, fs_base);
> +	assign_reg(core->thread_info->gpregs, regs, gs_base);
> +	assign_reg(core->thread_info->gpregs, regs, ds);
> +	assign_reg(core->thread_info->gpregs, regs, es);
> +	assign_reg(core->thread_info->gpregs, regs, fs);
> +	assign_reg(core->thread_info->gpregs, regs, gs);
> +
> +	assign_reg(core->thread_info->fpregs, fpregs, cwd);
> +	assign_reg(core->thread_info->fpregs, fpregs, swd);
> +	assign_reg(core->thread_info->fpregs, fpregs, twd);
> +	assign_reg(core->thread_info->fpregs, fpregs, fop);
> +	assign_reg(core->thread_info->fpregs, fpregs, rip);
> +	assign_reg(core->thread_info->fpregs, fpregs, rdp);
> +	assign_reg(core->thread_info->fpregs, fpregs, mxcsr);
> +	assign_reg(core->thread_info->fpregs, fpregs, mxcsr_mask);
> +
> +	/* Make sure we have enough space */
> +	BUG_ON(core->thread_info->fpregs->n_st_space != ARRAY_SIZE(fpregs.st_space));
> +	BUG_ON(core->thread_info->fpregs->n_xmm_space != ARRAY_SIZE(fpregs.xmm_space));
> +	BUG_ON(core->thread_info->fpregs->n_padding != ARRAY_SIZE(fpregs.padding));
> +
> +	assign_array(core->thread_info->fpregs, fpregs,	st_space);
> +	assign_array(core->thread_info->fpregs, fpregs,	xmm_space);
> +	assign_array(core->thread_info->fpregs, fpregs,	padding);
> +
> +	ret = 0;
> +
> +err:
> +	return ret;
> +}
> +
> +
> +static int arch_alloc_thread_info(CoreEntry *core) {
> +	ThreadInfoX86 *thread_info;
> +	UserX86RegsEntry *gpregs;
> +	UserX86FpregsEntry *fpregs;
> +
> +	thread_info = xmalloc(sizeof(*thread_info));
> +	if (!thread_info)
> +		goto err;
> +	thread_info_x86__init(thread_info);
> +	core->thread_info = thread_info;
> +
> +	gpregs = xmalloc(sizeof(*gpregs));
> +	if (!gpregs)
> +		goto err;
> +	user_x86_regs_entry__init(gpregs);
> +	thread_info->gpregs = gpregs;
> +
> +	fpregs = xmalloc(sizeof(*fpregs));
> +	if (!fpregs)
> +		goto err;
> +	user_x86_fpregs_entry__init(fpregs);
> +	thread_info->fpregs = fpregs;
> +
> +	/* These are numbers from kernel */
> +	fpregs->n_st_space	= 32;
> +	fpregs->n_xmm_space	= 64;
> +	fpregs->n_padding	= 24;
> +
> +	fpregs->st_space	= xzalloc(pb_repeated_size(fpregs, st_space));
> +	fpregs->xmm_space	= xzalloc(pb_repeated_size(fpregs, xmm_space));
> +	fpregs->padding		= xzalloc(pb_repeated_size(fpregs, padding));
> +
> +	if (!fpregs->st_space || !fpregs->xmm_space || !fpregs->padding)
> +		goto err;
> +
> +	return 0;
> +
> + err:
> +	return 1;
> +}
> +
> +
> +static void core_entry_free(CoreEntry *core)
> +{
> +	if (core) {
> +		if (CORE_THREAD_INFO(core)) {
> +			if (CORE_THREAD_INFO(core)->fpregs) {
> +				xfree(core->thread_info->fpregs->st_space);
> +				xfree(core->thread_info->fpregs->xmm_space);
> +				xfree(core->thread_info->fpregs->padding);
> +			}
> +			xfree(CORE_THREAD_INFO(core)->gpregs);
> +			xfree(CORE_THREAD_INFO(core)->fpregs);
> +		}
> +		xfree(CORE_THREAD_INFO(core));
> +		xfree(core->thread_core);
> +		xfree(core->tc);
> +		xfree(core->ids);
> +	}
> +}

First, these functions should be in some arch-specific .c file, not in header.
Second, this also should be the code move, i.e. with respective live-removers.

> +#endif
> diff --git a/arch/x86/arch_parasite.h b/arch/x86/arch_parasite.h
> new file mode 100644
> index 0000000..c45b185
> --- /dev/null
> +++ b/arch/x86/arch_parasite.h
> @@ -0,0 +1,6 @@
> +#ifndef X86_64_PARASITE_H__
> +#define X86_64_PARASITE_H__
> +
> +/* No x86_64 specific parasite code yet */

Is it a placeholder for future patching?

> +
> +#endif
> diff --git a/arch/x86/arch_parasite_syscall.h b/arch/x86/arch_parasite_syscall.h
> new file mode 100644
> index 0000000..a272cea
> --- /dev/null
> +++ b/arch/x86/arch_parasite_syscall.h
> @@ -0,0 +1,79 @@
> +#ifndef X86_64_PARASITE_SYSCALL_H_
> +#define X86_64_PARASITE_SYSCALL_H_
> +
> +#define ARCH_SI_TRAP SI_KERNEL
> +
> +static int __parasite_execute(struct parasite_ctl *ctl, pid_t pid, user_regs_struct_t *regs);
> +
> +/*
> + * Injected syscall instruction
> + */
> +
> +static const char code_syscall[] = {
> +        0x0f, 0x05,                          /* syscall    */
> +        0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc   /* int 3, ... */
> +};
> +
> +
> +/*
> + * The x86-64-specific parasite setup
> + */
> +
> +static void parasite_setup_regs(unsigned long new_ip, user_regs_struct_t *regs)
> +{
> +	regs->ip = new_ip;
> +
> +	/* Avoid end of syscall processing */
> +	regs->orig_ax = -1;
> +
> +	/* Make sure flags are in known state */
> +	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF);
> +}
> +
> +static void *mmap_seized(struct parasite_ctl *ctl,
> +			 void *addr, size_t length, int prot,
> +			 int flags, int fd, off_t offset)
> +{
> +	user_regs_struct_t regs = ctl->regs_orig;
> +	void *map = NULL;
> +	int ret;
> +
> +	regs.ax = (unsigned long)__NR_mmap;	/* mmap		*/
> +	regs.di = (unsigned long)addr;		/* @addr	*/
> +	regs.si = (unsigned long)length;	/* @length	*/
> +	regs.dx = (unsigned long)prot;		/* @prot	*/
> +	regs.r10= (unsigned long)flags;		/* @flags	*/
> +	regs.r8 = (unsigned long)fd;		/* @fd		*/
> +	regs.r9 = (unsigned long)offset;	/* @offset	*/
> +
> +	parasite_setup_regs(ctl->syscall_ip, &regs);
> +
> +	ret = __parasite_execute(ctl, ctl->pid, &regs);
> +	if (ret)
> +		goto err;
> +
> +	if ((long)regs.ax > 0)
> +		map = (void *)regs.ax;
> +err:
> +	return map;
> +}
> +
> +static int munmap_seized(struct parasite_ctl *ctl, void *addr, size_t length)
> +{
> +	user_regs_struct_t regs = ctl->regs_orig;
> +	int ret;
> +
> +	regs.ax = (unsigned long)__NR_munmap;	/* mmap		*/
> +	regs.di = (unsigned long)addr;		/* @addr	*/
> +	regs.si = (unsigned long)length;	/* @length	*/
> +
> +	parasite_setup_regs(ctl->syscall_ip, &regs);
> +
> +	ret = __parasite_execute(ctl, ctl->pid, &regs);
> +	if (!ret)
> +		ret = (int)regs.ax;
> +
> +	return ret;
> +}

1. This should be in arch .c file, not in header
2. This should include respective line-removers
3. Code added later in arm/ coincide with this a lot. This particular place should
   look like

 static int munmap_seized(struct parasite_ctl *ctl, void *addr, size_t length)
 {
 	user_regs_struct_t regs = ctl->regs_orig;
 	int ret;
 
-	regs.ax = (unsigned long)__NR_munmap;	/* mmap		*/
-	regs.di = (unsigned long)addr;		/* @addr	*/
-	regs.si = (unsigned long)length;	/* @length	*/
+	arch_setup_parasite_regs(...);
 
 	parasite_setup_regs(ctl->syscall_ip, &regs);
 
 	ret = __parasite_execute(ctl, ctl->pid, &regs);
 	if (!ret)
-		ret = (int)regs.ax;
+		ret = arch_parasite_retcode(...);
 
 	return ret;
 }

with proper arch_setup_parasite_regs() and arch_parasite_retcode +-ed.
Similar with the mmap_seized().

> +
> +#endif
> diff --git a/arch/x86/arch_restorer.h b/arch/x86/arch_restorer.h
> new file mode 100644
> index 0000000..7b87d9b
> --- /dev/null
> +++ b/arch/x86/arch_restorer.h
> @@ -0,0 +1,18 @@
> +#ifndef X86_64_RESTORER_H_
> +#define X86_64_RESTORER_H_
> +
> +#define jump_to_restorer_blob(new_sp, restore_task_exec_start,          \
> +			      task_args)				\
> +	asm volatile(							\
> +		     "movq %0, %%rbx				    \n" \
> +		     "movq %1, %%rax				    \n" \
> +		     "movq %2, %%rdi				    \n" \
> +		     "movq %%rbx, %%rsp			       	    \n"	\
> +		     "callq *%%rax				    \n" \
> +		     :							\
> +		     : "g"(new_sp),					\
> +		       "g"(restore_task_exec_start),			\
> +		       "g"(task_args)					\
> +		     : "rsp", "rdi", "rsi", "rbx", "rax", "memory")

Same thing here -- not add, but move.

> +#endif
> diff --git a/include/atomic.h b/arch/x86/atomic.h
> similarity index 100%
> rename from include/atomic.h
> rename to arch/x86/atomic.h
> diff --git a/include/linkage.h b/arch/x86/linkage.h
> similarity index 100%
> rename from include/linkage.h
> rename to arch/x86/linkage.h
> diff --git a/include/memcpy_64.h b/arch/x86/memcpy_64.h
> similarity index 100%
> rename from include/memcpy_64.h
> rename to arch/x86/memcpy_64.h
> diff --git a/pie/parasite-head-x86-64.S b/arch/x86/parasite-head.S
> similarity index 100%
> rename from pie/parasite-head-x86-64.S
> rename to arch/x86/parasite-head.S
> diff --git a/include/processor-flags.h b/arch/x86/processor-flags.h
> similarity index 100%
> rename from include/processor-flags.h
> rename to arch/x86/processor-flags.h
> diff --git a/arch/x86/restorer_private.h b/arch/x86/restorer_private.h
> new file mode 100644
> index 0000000..09d452e
> --- /dev/null
> +++ b/arch/x86/restorer_private.h
> @@ -0,0 +1,179 @@

The code below is all about the same two things:

1. no huge code in headers, move to .c
2. no code add, but code move


>  #endif /* CR_BITOPS_H_ */
> diff --git a/include/image.h b/include/image.h
> index 916b9a8..39f68e6 100644
> --- a/include/image.h
> +++ b/include/image.h
> @@ -105,25 +105,19 @@ struct page_entry {
>  
>  #define CR_CAP_SIZE	2
>  
> -#ifdef CONFIG_X86_64
> -
>  #define GDT_ENTRY_TLS_ENTRIES 3
> -#define TASK_COMM_LEN 16
>  
>  #define TASK_PF_USED_MATH		0x00002000
>  
> -#ifdef CONFIG_X86_64
> -# define AT_VECTOR_SIZE 44
> -#else
> -# define AT_VECTOR_SIZE 22		/* Not needed at moment */
> -#endif
> +
> +  //#endif /* CONFIG_X86_64 */

Why is this comment here?

>  
>  #define TASK_ALIVE		0x1
>  #define TASK_DEAD		0x2
>  #define TASK_STOPPED		0x3 /* FIXME - implement */
>  #define TASK_HELPER		0x4
>  
> -#endif /* CONFIG_X86_64 */
> +#define TASK_COMM_LEN 16

If this thing is left in this file, don't move it. It's hard to review patch which
does code move and code reshuffle in one go.

>  /*
>   * There are always 4 magic bytes at the
> diff --git a/include/syscall-types.h b/include/syscall-types.h
> index 261e288..5f90f2d 100644
> --- a/include/syscall-types.h
> +++ b/include/syscall-types.h
> @@ -14,10 +14,6 @@
>  
>  #include "types.h"
>  
> -#ifndef CONFIG_X86_64
> -# error x86-32 bit mode not yet implemented
> -#endif
> -
>  struct cap_header {
>  	u32 version;
>  	int pid;
> diff --git a/include/types.h b/include/types.h
> index 9b7884f..40a5f78 100644
> --- a/include/types.h
> +++ b/include/types.h
> @@ -84,10 +84,10 @@ typedef signed char		s8;
>  #define _LINUX_CAPABILITY_VERSION_3	0x20080522
>  #define _LINUX_CAPABILITY_U32S_3	2
>  
> -#ifdef CONFIG_X86_64
> +#include <arch-types.h>
>  
>  typedef struct {
> -	unsigned long sig[1];
> +	u64 sig[1];

Please, explain this change.

>  } rt_sigset_t;
>  
>  typedef void rt_signalfn_t(int, siginfo_t *, void *);

Thanks,
Pavel



More information about the CRIU mailing list