[CRIU] [PATCH 01/20] x86: moved x86-specific files into the directory arch/x86.
Pavel Emelyanov
xemul at parallels.com
Wed Dec 12 10:43:11 EST 2012
On 12/12/2012 06:48 PM, alekskartashov at parallels.com wrote:
> From: Alexander Kartashov <alekskartashov at parallels.com>
>
> The following files goes into the directory arch/x86 unmodified:
>
> * include/atomic.h,
> * include/linkage.h,
> * include/memcpy_64.h,
> * pie/parasite-head-x86-64.S,
> * include/processor-flags.h,
> * include/syscall-x86-64.def.
>
> Removed target architecture checks from the files:
>
> * include/image.h,
> * include/syscall-types.h.
>
> The following files are split into machine-independent (that goes into the directory arch/x86)
> and machine-dependent parts:
>
> * include/types.h --- machine dependent part is in the file arch/x86/arch-types.h,
> * include/bitops.h --- machine dependent part is in the file arch/x86/arch_bitops.h.
>
> The following files contain machine-dependent parts extracted from other sources:
>
> * arch/x86/arch_parasite.h --- contains machine-dependent part of the parasite;
> it's empty now and intended only to support multiarch builds.
> * arch/x86/arch_cr_dump.h --- contains machine-dependent parts of cr-dump.c.
> * arch/x86/restorer_private.h --- contains machine-dependent parts of the restorer.
> * arch/x86/arch_restorer.h --- contains machine-dependent parts of cr-restore.c.
Much much better. Please, find comments inline. Some of questions probably have
implicit answers in the subsequent patches, but since this set is HUGE, I ask
them explicitly.
> Signed-off-by: Alexander Kartashov <alekskartashov at parallels.com>
> diff --git a/arch/x86/Makefile.inc b/arch/x86/Makefile.inc
> new file mode 100644
> index 0000000..b63bd25
> --- /dev/null
> +++ b/arch/x86/Makefile.inc
> @@ -0,0 +1,7 @@
> +DEFINES += -DCONFIG_X86_64
> +ARCH_BITS := 64
> +
> +CC := gcc
> +LD := ld
> +OBJCOPY := objcopy
> +LDARCH := i386:x86-64
Where (int what patch) do you start using this file?
> diff --git a/arch/x86/arch-types.h b/arch/x86/arch-types.h
> new file mode 100644
> index 0000000..ac99ee4
> --- /dev/null
> +++ b/arch/x86/arch-types.h
An overall note on the file names -- it's ugly, when a file name is arch/xxx/arch-yyy.h.
Please, rework the arch-dependent headers model the way it looks in the Linux kernel.
I.e.
- files look like arch/<arch>/include/asm/<file>.h
- everyone who need arch-specific header does #include <asm/<file>.h>
- the -I arch/<arch/include/ option is added by builder
> @@ -0,0 +1,92 @@
> +#ifndef PROCESSOR_X86_64_H_
> +#define PROCESSOR_X86_64_H_
> +
> +#define PRIs PRIu64
What is it?
> +#define REG_IP(regs) regs.ip
> +#define REG_RES(regs) regs.ax
This wasn't "moved", but "added". It shouldn't be in this patch.
> +#define _NSIG_BPW 64
> +#define TASK_SIZE (1UL << 47) - PAGE_SIZE
> +
> +#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__X86_64
> +
> +#define CORE_THREAD_INFO(core) core->thread_info
> +#define CORE_GPREGS(core) (core->thread_info->gpregs)
Neither should this.
> +
> +#ifdef CONFIG_X86_64
> +# define AT_VECTOR_SIZE 44
> +#else
> +# define AT_VECTOR_SIZE 22 /* Not needed at moment */
> +#endif
> +
> +typedef uint64_t auxv_t;
This is uint64_t for arm as well, why in arch?
> +
> +#define SIGFRAME_OFFSET 8
This should be in another patch, which does use this constant.
> +
> +#define UserRegsEntry UserX86RegsEntry
> +
> +typedef struct {
> + int __unused;
> +} UserFPState;
And these two too.
> +
> +#endif
> diff --git a/arch/x86/arch_bitops.h b/arch/x86/arch_bitops.h
> new file mode 100644
> index 0000000..552b135
> --- /dev/null
> +++ b/arch/x86/arch_bitops.h
> @@ -0,0 +1,45 @@
> +#ifndef X86_64_BITOPS_H_
> +#define X86_64_BITOPS_H_
> +
> +static inline void set_bit(int nr, volatile unsigned long *addr)
> +{
> + asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
> +}
> +
> +static inline void change_bit(int nr, volatile unsigned long *addr)
> +{
> + asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
> +}
> +
> +static inline int test_bit(int nr, volatile const unsigned long *addr)
> +{
> + int oldbit;
> +
> + asm volatile("bt %2,%1\n\t"
> + "sbb %0,%0"
> + : "=r" (oldbit)
> + : "m" (*(unsigned long *)addr), "Ir" (nr));
> +
> + return oldbit;
> +}
> +
> +static inline void clear_bit(int nr, volatile unsigned long *addr)
> +{
> + asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
> +}
> +
> +/**
> + * __ffs - find first set bit in word
> + * @word: The word to search
> + *
> + * Undefined if no bit exists, so code should check against 0 first.
> + */
> +static inline unsigned long __ffs(unsigned long word)
> +{
> + asm("bsf %1,%0"
> + : "=r" (word)
> + : "rm" (word));
> + return word;
> +}
> +
> +#endif
Since this patch is intended to move the code, there should be respective
lines-removing portions in this patch.
> diff --git a/arch/x86/arch_cr_dump.h b/arch/x86/arch_cr_dump.h
> new file mode 100644
> index 0000000..a469f99
> --- /dev/null
> +++ b/arch/x86/arch_cr_dump.h
> @@ -0,0 +1,163 @@
> +#ifndef CR_DUMP_ARCH_H_X86_64_
> +#define CR_DUMP_ARCH_H_X86_64_
> +
> +#define TI_SP(core) ((core)->thread_info->gpregs->sp)
> +
> +#define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))src.e
> +#define assign_array(dst, src, e) memcpy(dst->e, &src.e, sizeof(src.e))
> +
> +static int get_task_regs(pid_t pid, CoreEntry *core, const struct parasite_ctl *ctl)
> +{
> + user_fpregs_struct_t fpregs = {-1};
> + user_regs_struct_t regs = {-1};
> +
> + int ret = -1;
> +
> + pr_info("Dumping GP/FPU registers ... ");
> +
> + if (ctl)
> + regs = ctl->regs_orig;
> + else {
> + if (ptrace(PTRACE_GETREGS, pid, NULL, ®s)) {
> + pr_err("Can't obtain GP registers for %d\n", pid);
> + goto err;
> + }
> + }
> +
> + if (ptrace(PTRACE_GETFPREGS, pid, NULL, &fpregs)) {
> + pr_err("Can't obtain FPU registers for %d\n", pid);
> + goto err;
> + }
> +
> + /* Did we come from a system call? */
> + if ((int)regs.orig_ax >= 0) {
> + /* Restart the system call */
> + switch ((long)(int)regs.ax) {
> + case -ERESTARTNOHAND:
> + case -ERESTARTSYS:
> + case -ERESTARTNOINTR:
> + regs.ax = regs.orig_ax;
> + regs.ip -= 2;
> + break;
> + case -ERESTART_RESTARTBLOCK:
> + regs.ax = __NR_restart_syscall;
> + regs.ip -= 2;
> + break;
> + }
> + }
> +
> + assign_reg(core->thread_info->gpregs, regs, r15);
> + assign_reg(core->thread_info->gpregs, regs, r14);
> + assign_reg(core->thread_info->gpregs, regs, r13);
> + assign_reg(core->thread_info->gpregs, regs, r12);
> + assign_reg(core->thread_info->gpregs, regs, bp);
> + assign_reg(core->thread_info->gpregs, regs, bx);
> + assign_reg(core->thread_info->gpregs, regs, r11);
> + assign_reg(core->thread_info->gpregs, regs, r10);
> + assign_reg(core->thread_info->gpregs, regs, r9);
> + assign_reg(core->thread_info->gpregs, regs, r8);
> + assign_reg(core->thread_info->gpregs, regs, ax);
> + assign_reg(core->thread_info->gpregs, regs, cx);
> + assign_reg(core->thread_info->gpregs, regs, dx);
> + assign_reg(core->thread_info->gpregs, regs, si);
> + assign_reg(core->thread_info->gpregs, regs, di);
> + assign_reg(core->thread_info->gpregs, regs, orig_ax);
> + assign_reg(core->thread_info->gpregs, regs, ip);
> + assign_reg(core->thread_info->gpregs, regs, cs);
> + assign_reg(core->thread_info->gpregs, regs, flags);
> + assign_reg(core->thread_info->gpregs, regs, sp);
> + assign_reg(core->thread_info->gpregs, regs, ss);
> + assign_reg(core->thread_info->gpregs, regs, fs_base);
> + assign_reg(core->thread_info->gpregs, regs, gs_base);
> + assign_reg(core->thread_info->gpregs, regs, ds);
> + assign_reg(core->thread_info->gpregs, regs, es);
> + assign_reg(core->thread_info->gpregs, regs, fs);
> + assign_reg(core->thread_info->gpregs, regs, gs);
> +
> + assign_reg(core->thread_info->fpregs, fpregs, cwd);
> + assign_reg(core->thread_info->fpregs, fpregs, swd);
> + assign_reg(core->thread_info->fpregs, fpregs, twd);
> + assign_reg(core->thread_info->fpregs, fpregs, fop);
> + assign_reg(core->thread_info->fpregs, fpregs, rip);
> + assign_reg(core->thread_info->fpregs, fpregs, rdp);
> + assign_reg(core->thread_info->fpregs, fpregs, mxcsr);
> + assign_reg(core->thread_info->fpregs, fpregs, mxcsr_mask);
> +
> + /* Make sure we have enough space */
> + BUG_ON(core->thread_info->fpregs->n_st_space != ARRAY_SIZE(fpregs.st_space));
> + BUG_ON(core->thread_info->fpregs->n_xmm_space != ARRAY_SIZE(fpregs.xmm_space));
> + BUG_ON(core->thread_info->fpregs->n_padding != ARRAY_SIZE(fpregs.padding));
> +
> + assign_array(core->thread_info->fpregs, fpregs, st_space);
> + assign_array(core->thread_info->fpregs, fpregs, xmm_space);
> + assign_array(core->thread_info->fpregs, fpregs, padding);
> +
> + ret = 0;
> +
> +err:
> + return ret;
> +}
> +
> +
> +static int arch_alloc_thread_info(CoreEntry *core) {
> + ThreadInfoX86 *thread_info;
> + UserX86RegsEntry *gpregs;
> + UserX86FpregsEntry *fpregs;
> +
> + thread_info = xmalloc(sizeof(*thread_info));
> + if (!thread_info)
> + goto err;
> + thread_info_x86__init(thread_info);
> + core->thread_info = thread_info;
> +
> + gpregs = xmalloc(sizeof(*gpregs));
> + if (!gpregs)
> + goto err;
> + user_x86_regs_entry__init(gpregs);
> + thread_info->gpregs = gpregs;
> +
> + fpregs = xmalloc(sizeof(*fpregs));
> + if (!fpregs)
> + goto err;
> + user_x86_fpregs_entry__init(fpregs);
> + thread_info->fpregs = fpregs;
> +
> + /* These are numbers from kernel */
> + fpregs->n_st_space = 32;
> + fpregs->n_xmm_space = 64;
> + fpregs->n_padding = 24;
> +
> + fpregs->st_space = xzalloc(pb_repeated_size(fpregs, st_space));
> + fpregs->xmm_space = xzalloc(pb_repeated_size(fpregs, xmm_space));
> + fpregs->padding = xzalloc(pb_repeated_size(fpregs, padding));
> +
> + if (!fpregs->st_space || !fpregs->xmm_space || !fpregs->padding)
> + goto err;
> +
> + return 0;
> +
> + err:
> + return 1;
> +}
> +
> +
> +static void core_entry_free(CoreEntry *core)
> +{
> + if (core) {
> + if (CORE_THREAD_INFO(core)) {
> + if (CORE_THREAD_INFO(core)->fpregs) {
> + xfree(core->thread_info->fpregs->st_space);
> + xfree(core->thread_info->fpregs->xmm_space);
> + xfree(core->thread_info->fpregs->padding);
> + }
> + xfree(CORE_THREAD_INFO(core)->gpregs);
> + xfree(CORE_THREAD_INFO(core)->fpregs);
> + }
> + xfree(CORE_THREAD_INFO(core));
> + xfree(core->thread_core);
> + xfree(core->tc);
> + xfree(core->ids);
> + }
> +}
First, these functions should be in some arch-specific .c file, not in header.
Second, this also should be the code move, i.e. with respective live-removers.
> +#endif
> diff --git a/arch/x86/arch_parasite.h b/arch/x86/arch_parasite.h
> new file mode 100644
> index 0000000..c45b185
> --- /dev/null
> +++ b/arch/x86/arch_parasite.h
> @@ -0,0 +1,6 @@
> +#ifndef X86_64_PARASITE_H__
> +#define X86_64_PARASITE_H__
> +
> +/* No x86_64 specific parasite code yet */
Is it a placeholder for future patching?
> +
> +#endif
> diff --git a/arch/x86/arch_parasite_syscall.h b/arch/x86/arch_parasite_syscall.h
> new file mode 100644
> index 0000000..a272cea
> --- /dev/null
> +++ b/arch/x86/arch_parasite_syscall.h
> @@ -0,0 +1,79 @@
> +#ifndef X86_64_PARASITE_SYSCALL_H_
> +#define X86_64_PARASITE_SYSCALL_H_
> +
> +#define ARCH_SI_TRAP SI_KERNEL
> +
> +static int __parasite_execute(struct parasite_ctl *ctl, pid_t pid, user_regs_struct_t *regs);
> +
> +/*
> + * Injected syscall instruction
> + */
> +
> +static const char code_syscall[] = {
> + 0x0f, 0x05, /* syscall */
> + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */
> +};
> +
> +
> +/*
> + * The x86-64-specific parasite setup
> + */
> +
> +static void parasite_setup_regs(unsigned long new_ip, user_regs_struct_t *regs)
> +{
> + regs->ip = new_ip;
> +
> + /* Avoid end of syscall processing */
> + regs->orig_ax = -1;
> +
> + /* Make sure flags are in known state */
> + regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF);
> +}
> +
> +static void *mmap_seized(struct parasite_ctl *ctl,
> + void *addr, size_t length, int prot,
> + int flags, int fd, off_t offset)
> +{
> + user_regs_struct_t regs = ctl->regs_orig;
> + void *map = NULL;
> + int ret;
> +
> + regs.ax = (unsigned long)__NR_mmap; /* mmap */
> + regs.di = (unsigned long)addr; /* @addr */
> + regs.si = (unsigned long)length; /* @length */
> + regs.dx = (unsigned long)prot; /* @prot */
> + regs.r10= (unsigned long)flags; /* @flags */
> + regs.r8 = (unsigned long)fd; /* @fd */
> + regs.r9 = (unsigned long)offset; /* @offset */
> +
> + parasite_setup_regs(ctl->syscall_ip, ®s);
> +
> + ret = __parasite_execute(ctl, ctl->pid, ®s);
> + if (ret)
> + goto err;
> +
> + if ((long)regs.ax > 0)
> + map = (void *)regs.ax;
> +err:
> + return map;
> +}
> +
> +static int munmap_seized(struct parasite_ctl *ctl, void *addr, size_t length)
> +{
> + user_regs_struct_t regs = ctl->regs_orig;
> + int ret;
> +
> + regs.ax = (unsigned long)__NR_munmap; /* mmap */
> + regs.di = (unsigned long)addr; /* @addr */
> + regs.si = (unsigned long)length; /* @length */
> +
> + parasite_setup_regs(ctl->syscall_ip, ®s);
> +
> + ret = __parasite_execute(ctl, ctl->pid, ®s);
> + if (!ret)
> + ret = (int)regs.ax;
> +
> + return ret;
> +}
1. This should be in arch .c file, not in header
2. This should include respective line-removers
3. Code added later in arm/ coincide with this a lot. This particular place should
look like
static int munmap_seized(struct parasite_ctl *ctl, void *addr, size_t length)
{
user_regs_struct_t regs = ctl->regs_orig;
int ret;
- regs.ax = (unsigned long)__NR_munmap; /* mmap */
- regs.di = (unsigned long)addr; /* @addr */
- regs.si = (unsigned long)length; /* @length */
+ arch_setup_parasite_regs(...);
parasite_setup_regs(ctl->syscall_ip, ®s);
ret = __parasite_execute(ctl, ctl->pid, ®s);
if (!ret)
- ret = (int)regs.ax;
+ ret = arch_parasite_retcode(...);
return ret;
}
with proper arch_setup_parasite_regs() and arch_parasite_retcode +-ed.
Similar with the mmap_seized().
> +
> +#endif
> diff --git a/arch/x86/arch_restorer.h b/arch/x86/arch_restorer.h
> new file mode 100644
> index 0000000..7b87d9b
> --- /dev/null
> +++ b/arch/x86/arch_restorer.h
> @@ -0,0 +1,18 @@
> +#ifndef X86_64_RESTORER_H_
> +#define X86_64_RESTORER_H_
> +
> +#define jump_to_restorer_blob(new_sp, restore_task_exec_start, \
> + task_args) \
> + asm volatile( \
> + "movq %0, %%rbx \n" \
> + "movq %1, %%rax \n" \
> + "movq %2, %%rdi \n" \
> + "movq %%rbx, %%rsp \n" \
> + "callq *%%rax \n" \
> + : \
> + : "g"(new_sp), \
> + "g"(restore_task_exec_start), \
> + "g"(task_args) \
> + : "rsp", "rdi", "rsi", "rbx", "rax", "memory")
Same thing here -- not add, but move.
> +#endif
> diff --git a/include/atomic.h b/arch/x86/atomic.h
> similarity index 100%
> rename from include/atomic.h
> rename to arch/x86/atomic.h
> diff --git a/include/linkage.h b/arch/x86/linkage.h
> similarity index 100%
> rename from include/linkage.h
> rename to arch/x86/linkage.h
> diff --git a/include/memcpy_64.h b/arch/x86/memcpy_64.h
> similarity index 100%
> rename from include/memcpy_64.h
> rename to arch/x86/memcpy_64.h
> diff --git a/pie/parasite-head-x86-64.S b/arch/x86/parasite-head.S
> similarity index 100%
> rename from pie/parasite-head-x86-64.S
> rename to arch/x86/parasite-head.S
> diff --git a/include/processor-flags.h b/arch/x86/processor-flags.h
> similarity index 100%
> rename from include/processor-flags.h
> rename to arch/x86/processor-flags.h
> diff --git a/arch/x86/restorer_private.h b/arch/x86/restorer_private.h
> new file mode 100644
> index 0000000..09d452e
> --- /dev/null
> +++ b/arch/x86/restorer_private.h
> @@ -0,0 +1,179 @@
The code below is all about the same two things:
1. no huge code in headers, move to .c
2. no code add, but code move
> #endif /* CR_BITOPS_H_ */
> diff --git a/include/image.h b/include/image.h
> index 916b9a8..39f68e6 100644
> --- a/include/image.h
> +++ b/include/image.h
> @@ -105,25 +105,19 @@ struct page_entry {
>
> #define CR_CAP_SIZE 2
>
> -#ifdef CONFIG_X86_64
> -
> #define GDT_ENTRY_TLS_ENTRIES 3
> -#define TASK_COMM_LEN 16
>
> #define TASK_PF_USED_MATH 0x00002000
>
> -#ifdef CONFIG_X86_64
> -# define AT_VECTOR_SIZE 44
> -#else
> -# define AT_VECTOR_SIZE 22 /* Not needed at moment */
> -#endif
> +
> + //#endif /* CONFIG_X86_64 */
Why is this comment here?
>
> #define TASK_ALIVE 0x1
> #define TASK_DEAD 0x2
> #define TASK_STOPPED 0x3 /* FIXME - implement */
> #define TASK_HELPER 0x4
>
> -#endif /* CONFIG_X86_64 */
> +#define TASK_COMM_LEN 16
If this thing is left in this file, don't move it. It's hard to review patch which
does code move and code reshuffle in one go.
> /*
> * There are always 4 magic bytes at the
> diff --git a/include/syscall-types.h b/include/syscall-types.h
> index 261e288..5f90f2d 100644
> --- a/include/syscall-types.h
> +++ b/include/syscall-types.h
> @@ -14,10 +14,6 @@
>
> #include "types.h"
>
> -#ifndef CONFIG_X86_64
> -# error x86-32 bit mode not yet implemented
> -#endif
> -
> struct cap_header {
> u32 version;
> int pid;
> diff --git a/include/types.h b/include/types.h
> index 9b7884f..40a5f78 100644
> --- a/include/types.h
> +++ b/include/types.h
> @@ -84,10 +84,10 @@ typedef signed char s8;
> #define _LINUX_CAPABILITY_VERSION_3 0x20080522
> #define _LINUX_CAPABILITY_U32S_3 2
>
> -#ifdef CONFIG_X86_64
> +#include <arch-types.h>
>
> typedef struct {
> - unsigned long sig[1];
> + u64 sig[1];
Please, explain this change.
> } rt_sigset_t;
>
> typedef void rt_signalfn_t(int, siginfo_t *, void *);
Thanks,
Pavel
More information about the CRIU
mailing list