[CRIU] [PATCH 10/20] cr-restore.c: introduced the multiarch support.
Pavel Emelyanov
xemul at parallels.com
Wed Dec 12 11:03:34 EST 2012
On 12/12/2012 05:34 PM, alekskartashov at parallels.com wrote:
> From: Alexander Kartashov <alekskartashov at parallels.com>
>
> * Introduced the macro CONFIG_DEBUG_RESTORE to stop restoration after a fork.
> * Introduced the macro CORE_GPREGS to access machine general-purpose registers.
> * Introduced the macro jump_to_restorer_blob to jump to the restorer blob
> in a machine-dependent way.
>
> Signed-off-by: Alexander Kartashov <alekskartashov at parallels.com>
> ---
> cr-restore.c | 74 ++++++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 46 insertions(+), 28 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 8dd1886..995656e 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -59,6 +59,8 @@
> #include "protobuf/itimer.pb-c.h"
> #include "protobuf/vma.pb-c.h"
>
> +#include <arch_restorer.h>
> +
> static struct pstree_item *current;
>
> static int restore_task_with_children(void *);
> @@ -210,7 +212,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>
> if (p->vma.end == vma->vma.end &&
> p->vma.start == vma->vma.start) {
> - pr_info("COW 0x%016lx-0x%016lx 0x%016lx vma\n",
> + pr_info("COW 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
> vma->vma.start, vma->vma.end, vma->vma.pgoff);
WTF is that? It's just unsigned long printing, why this ugly format hacks?!
> paddr = (void *) vma_premmaped_start(&p->vma);
> break;
> @@ -221,7 +223,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
> *pvma = p;
>
> if (paddr == NULL) {
> - pr_info("Map 0x%016lx-0x%016lx 0x%016lx vma\n",
> + pr_info("Map 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
> vma->vma.start, vma->vma.end, vma->vma.pgoff);
>
> addr = mmap(tgt_addr, vma_entry_len(&vma->vma),
> @@ -272,7 +274,7 @@ static int restore_priv_vma_content(pid_t pid)
> * Read page contents.
> */
> while (1) {
> - u64 va, page_offset;
> + size_t va, page_offset;
Shouldn't it be unsigned long instead? Memory addresses are "encoded" in
unsigned long-s, not in size_t-s.
> char buf[PAGE_SIZE];
> void *p;
>
> @@ -467,7 +469,7 @@ static int open_vmas(int pid)
> if (!(vma_entry_is(&vma->vma, VMA_AREA_REGULAR)))
> continue;
>
> - pr_info("Opening 0x%016lx-0x%016lx 0x%016lx (%x) vma\n",
> + pr_info("Opening 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma (%x)\n",
> vma->vma.start, vma->vma.end,
> vma->vma.pgoff, vma->vma.status);
>
> @@ -701,7 +703,7 @@ static int check_core(CoreEntry *core)
> {
> int ret = -1;
>
> - if (core->mtype != CORE_ENTRY__MARCH__X86_64) {
> + if (core->mtype != CORE_ENTRY__MARCH) {
> pr_err("Core march mismatch %d\n", (int)core->mtype);
> goto out;
> }
> @@ -717,7 +719,7 @@ static int check_core(CoreEntry *core)
> goto out;
> }
>
> - if (!core->thread_info) {
> + if (!CORE_THREAD_INFO(core)) {
> pr_err("Core info data missed for non-zombie\n");
> goto out;
> }
> @@ -998,6 +1000,13 @@ static int restore_task_with_children(void *_arg)
> int ret;
> sigset_t blockmask;
>
> +#ifdef CONFIG_DEBUG_RESTORE
> + if (kill(getpid(), SIGSTOP) < 0) {
> + pr_err("Failed to stop oneself: %d\n", errno);
> + return -1;
> + }
> +#endif
:\ Lost debugging stuff?
> +
> close_safe(&ca->fd);
>
> current = ca->item;
> @@ -1120,6 +1129,8 @@ static int restore_switch_stage(int next_stage)
> static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
> {
> int ret;
> +
> +#ifndef CONFIG_DEBUG_RESTORE
> struct sigaction act, old_act;
>
> ret = sigaction(SIGCHLD, NULL, &act);
> @@ -1138,6 +1149,7 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
> pr_perror("sigaction() failed\n");
> return -1;
> }
> +#endif
>
> /*
> * FIXME -- currently we assume that all the tasks live
> @@ -1183,12 +1195,16 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
>
> futex_wait_until(&task_entries->nr_in_progress, 0);
>
> +
> +#ifndef CONFIG_DEBUG_RESTORE
> /* Restore SIGCHLD here to skip SIGCHLD from a network sctip */
> +
> ret = sigaction(SIGCHLD, &old_act, NULL);
> if (ret < 0) {
> pr_perror("sigaction() failed\n");
> goto out;
> }
> +#endif
>
> network_unlock();
> out:
> @@ -1248,7 +1264,6 @@ int cr_restore_tasks(pid_t pid, struct cr_options *opts)
> return restore_root_task(root_item, opts);
> }
>
> -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
> static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
> struct list_head *self_vma_list, long vma_len)
> {
> @@ -1256,7 +1271,7 @@ static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
> long prev_vma_end = 0;
> struct vma_area end_vma;
>
> - end_vma.vma.start = end_vma.vma.end = TASK_SIZE_MAX;
> + end_vma.vma.start = end_vma.vma.end = TASK_SIZE;
> prev_vma_end = PAGE_SIZE;
>
> s_vma = list_first_entry(self_vma_list, struct vma_area, list);
> @@ -1456,7 +1471,7 @@ static VmaEntry *vma_list_remap(void *addr, unsigned long len, struct list_head
>
> static int prepare_mm(pid_t pid, struct task_restore_core_args *args)
> {
> - int fd, exe_fd, ret = -1;
> + int fd, exe_fd, i, ret = -1;
> MmEntry *mm;
>
> fd = open_image_ro(CR_FD_MM, pid);
> @@ -1478,8 +1493,10 @@ static int prepare_mm(pid_t pid, struct task_restore_core_args *args)
> }
>
> args->mm_saved_auxv_size = pb_repeated_size(mm, mm_saved_auxv);
> - memcpy(args->mm_saved_auxv, mm->mm_saved_auxv,
> - args->mm_saved_auxv_size);
> +
> + for (i = 0; i < mm->n_mm_saved_auxv; ++i) {
> + args->mm_saved_auxv[i] = (auxv_t)mm->mm_saved_auxv[i];
Why is this loop better than memecpy?
> + }
>
> exe_fd = open_reg_by_id(args->mm.exe_file_id);
> if (exe_fd < 0)
> @@ -1649,8 +1666,11 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> KBYTES(restore_bootstrap_len));
>
> ret = remap_restorer_blob((void *)exec_mem_hint);
> - if (ret < 0)
> +
> + if (ret < 0) {
> + pr_err("Remapping the restorer blob failed\n");
If you find logging not sufficient, send log extensions in separate patches.
> goto err;
> + }
>
> /*
> * Prepare a memory map for restorer. Note a thread space
> @@ -1727,7 +1747,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> * Arguments for task restoration.
> */
>
> - BUG_ON(core->mtype != CORE_ENTRY__MARCH__X86_64);
> + BUG_ON(core->mtype != CORE_ENTRY__MARCH);
>
> task_args->t.pid = pid;
> task_args->logfd = log_get_fd();
> @@ -1736,12 +1756,16 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>
> strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm));
>
> - task_args->t.clear_tid_addr = core->thread_info->clear_tid_addr;
> + task_args->t.clear_tid_addr = CORE_THREAD_INFO(core)->clear_tid_addr;
> task_args->ids = *core->ids;
> - task_args->t.gpregs = *core->thread_info->gpregs;
> + task_args->t.gpregs = *CORE_GPREGS(core);
> task_args->t.blk_sigset = core->tc->blk_sigset;
> task_args->t.has_blk_sigset = true;
>
> +#ifdef CONFIG_HAS_TLS
> + get_core_tls(core, task_args);
> +#endif
> +
> if (core->thread_core) {
> task_args->t.has_futex = true;
> task_args->t.futex_rla = core->thread_core->futex_rla;
> @@ -1810,8 +1834,11 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> }
>
> thread_args[i].ta = task_args;
> - thread_args[i].gpregs = *core->thread_info->gpregs;
> - thread_args[i].clear_tid_addr = core->thread_info->clear_tid_addr;
> + thread_args[i].gpregs = *CORE_GPREGS(core);
> + thread_args[i].clear_tid_addr = CORE_THREAD_INFO(core)->clear_tid_addr;
> +#ifdef CONFIG_HAS_TLS
> + thread_args[i].tls = CORE_THREAD_INFO(core)->tls;
> +#endif
>
> if (core->thread_core) {
> thread_args[i].has_futex = true;
> @@ -1850,17 +1877,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> * An indirect call to task_restore, note it never resturns
> * and restoreing core is extremely destructive.
> */
> - asm volatile(
> - "movq %0, %%rbx \n"
> - "movq %1, %%rax \n"
> - "movq %2, %%rdi \n"
> - "movq %%rbx, %%rsp \n"
> - "callq *%%rax \n"
> - :
> - : "g"(new_sp),
> - "g"(restore_task_exec_start),
> - "g"(task_args)
> - : "rsp", "rdi", "rsi", "rbx", "rax", "memory");
> +
> + jump_to_restorer_blob(new_sp, restore_task_exec_start, task_args);
Need some order here. Part of PIE asm stuff is done as macros and is written in capitals,
part is done as functions and is written in small letters. Do all in one style.
>
> err:
> free_mappings(&self_vma_list);
>
More information about the CRIU
mailing list