[CRIU] [PATCH 10/20] cr-restore.c: introduced the multiarch support.
Alexander Kartashov
alekskartashov at parallels.com
Wed Dec 12 11:28:48 EST 2012
> WTF is that? It's just unsigned long printing, why this ugly format hacks?!
Unfortunately, almost all format strings have to be fixed in this way.
In this case vma->vma.start is 64-bit long but %lx on ARM expects
32-bit interger.
> Why is this loop better than memecpy?
The size of auxv_t is different on X86 and ARM.
> If you find logging not sufficient, send log extensions in separate patches.
> Need some order here. Part of PIE asm stuff is done as macros and is written in capitals,
> part is done as functions and is written in small letters. Do all in one style.
To be fixed in the next version.
On 12/12/2012 08:03 PM, Pavel Emelyanov wrote:
> On 12/12/2012 05:34 PM, alekskartashov at parallels.com wrote:
>> From: Alexander Kartashov <alekskartashov at parallels.com>
>>
>> * Introduced the macro CONFIG_DEBUG_RESTORE to stop restoration after a fork.
>> * Introduced the macro CORE_GPREGS to access machine general-purpose registers.
>> * Introduced the macro jump_to_restorer_blob to jump to the restorer blob
>> in a machine-dependent way.
>>
>> Signed-off-by: Alexander Kartashov <alekskartashov at parallels.com>
>> ---
>> cr-restore.c | 74 ++++++++++++++++++++++++++++++++++++----------------------
>> 1 file changed, 46 insertions(+), 28 deletions(-)
>>
>> diff --git a/cr-restore.c b/cr-restore.c
>> index 8dd1886..995656e 100644
>> --- a/cr-restore.c
>> +++ b/cr-restore.c
>> @@ -59,6 +59,8 @@
>> #include "protobuf/itimer.pb-c.h"
>> #include "protobuf/vma.pb-c.h"
>>
>> +#include <arch_restorer.h>
>> +
>> static struct pstree_item *current;
>>
>> static int restore_task_with_children(void *);
>> @@ -210,7 +212,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>>
>> if (p->vma.end == vma->vma.end &&
>> p->vma.start == vma->vma.start) {
>> - pr_info("COW 0x%016lx-0x%016lx 0x%016lx vma\n",
>> + pr_info("COW 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
>> vma->vma.start, vma->vma.end, vma->vma.pgoff);
> WTF is that? It's just unsigned long printing, why this ugly format hacks?!
>
>> paddr = (void *) vma_premmaped_start(&p->vma);
>> break;
>> @@ -221,7 +223,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>> *pvma = p;
>>
>> if (paddr == NULL) {
>> - pr_info("Map 0x%016lx-0x%016lx 0x%016lx vma\n",
>> + pr_info("Map 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
>> vma->vma.start, vma->vma.end, vma->vma.pgoff);
>>
>> addr = mmap(tgt_addr, vma_entry_len(&vma->vma),
>> @@ -272,7 +274,7 @@ static int restore_priv_vma_content(pid_t pid)
>> * Read page contents.
>> */
>> while (1) {
>> - u64 va, page_offset;
>> + size_t va, page_offset;
> Shouldn't it be unsigned long instead? Memory addresses are "encoded" in
> unsigned long-s, not in size_t-s.
>
>> char buf[PAGE_SIZE];
>> void *p;
>>
>> @@ -467,7 +469,7 @@ static int open_vmas(int pid)
>> if (!(vma_entry_is(&vma->vma, VMA_AREA_REGULAR)))
>> continue;
>>
>> - pr_info("Opening 0x%016lx-0x%016lx 0x%016lx (%x) vma\n",
>> + pr_info("Opening 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma (%x)\n",
>> vma->vma.start, vma->vma.end,
>> vma->vma.pgoff, vma->vma.status);
>>
>> @@ -701,7 +703,7 @@ static int check_core(CoreEntry *core)
>> {
>> int ret = -1;
>>
>> - if (core->mtype != CORE_ENTRY__MARCH__X86_64) {
>> + if (core->mtype != CORE_ENTRY__MARCH) {
>> pr_err("Core march mismatch %d\n", (int)core->mtype);
>> goto out;
>> }
>> @@ -717,7 +719,7 @@ static int check_core(CoreEntry *core)
>> goto out;
>> }
>>
>> - if (!core->thread_info) {
>> + if (!CORE_THREAD_INFO(core)) {
>> pr_err("Core info data missed for non-zombie\n");
>> goto out;
>> }
>> @@ -998,6 +1000,13 @@ static int restore_task_with_children(void *_arg)
>> int ret;
>> sigset_t blockmask;
>>
>> +#ifdef CONFIG_DEBUG_RESTORE
>> + if (kill(getpid(), SIGSTOP) < 0) {
>> + pr_err("Failed to stop oneself: %d\n", errno);
>> + return -1;
>> + }
>> +#endif
> :\ Lost debugging stuff?
>
>> +
>> close_safe(&ca->fd);
>>
>> current = ca->item;
>> @@ -1120,6 +1129,8 @@ static int restore_switch_stage(int next_stage)
>> static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
>> {
>> int ret;
>> +
>> +#ifndef CONFIG_DEBUG_RESTORE
>> struct sigaction act, old_act;
>>
>> ret = sigaction(SIGCHLD, NULL, &act);
>> @@ -1138,6 +1149,7 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
>> pr_perror("sigaction() failed\n");
>> return -1;
>> }
>> +#endif
>>
>> /*
>> * FIXME -- currently we assume that all the tasks live
>> @@ -1183,12 +1195,16 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
>>
>> futex_wait_until(&task_entries->nr_in_progress, 0);
>>
>> +
>> +#ifndef CONFIG_DEBUG_RESTORE
>> /* Restore SIGCHLD here to skip SIGCHLD from a network sctip */
>> +
>> ret = sigaction(SIGCHLD, &old_act, NULL);
>> if (ret < 0) {
>> pr_perror("sigaction() failed\n");
>> goto out;
>> }
>> +#endif
>>
>> network_unlock();
>> out:
>> @@ -1248,7 +1264,6 @@ int cr_restore_tasks(pid_t pid, struct cr_options *opts)
>> return restore_root_task(root_item, opts);
>> }
>>
>> -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
>> static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
>> struct list_head *self_vma_list, long vma_len)
>> {
>> @@ -1256,7 +1271,7 @@ static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
>> long prev_vma_end = 0;
>> struct vma_area end_vma;
>>
>> - end_vma.vma.start = end_vma.vma.end = TASK_SIZE_MAX;
>> + end_vma.vma.start = end_vma.vma.end = TASK_SIZE;
>> prev_vma_end = PAGE_SIZE;
>>
>> s_vma = list_first_entry(self_vma_list, struct vma_area, list);
>> @@ -1456,7 +1471,7 @@ static VmaEntry *vma_list_remap(void *addr, unsigned long len, struct list_head
>>
>> static int prepare_mm(pid_t pid, struct task_restore_core_args *args)
>> {
>> - int fd, exe_fd, ret = -1;
>> + int fd, exe_fd, i, ret = -1;
>> MmEntry *mm;
>>
>> fd = open_image_ro(CR_FD_MM, pid);
>> @@ -1478,8 +1493,10 @@ static int prepare_mm(pid_t pid, struct task_restore_core_args *args)
>> }
>>
>> args->mm_saved_auxv_size = pb_repeated_size(mm, mm_saved_auxv);
>> - memcpy(args->mm_saved_auxv, mm->mm_saved_auxv,
>> - args->mm_saved_auxv_size);
>> +
>> + for (i = 0; i < mm->n_mm_saved_auxv; ++i) {
>> + args->mm_saved_auxv[i] = (auxv_t)mm->mm_saved_auxv[i];
> Why is this loop better than memecpy?
>
>> + }
>>
>> exe_fd = open_reg_by_id(args->mm.exe_file_id);
>> if (exe_fd < 0)
>> @@ -1649,8 +1666,11 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>> KBYTES(restore_bootstrap_len));
>>
>> ret = remap_restorer_blob((void *)exec_mem_hint);
>> - if (ret < 0)
>> +
>> + if (ret < 0) {
>> + pr_err("Remapping the restorer blob failed\n");
> If you find logging not sufficient, send log extensions in separate patches.
>
>> goto err;
>> + }
>>
>> /*
>> * Prepare a memory map for restorer. Note a thread space
>> @@ -1727,7 +1747,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>> * Arguments for task restoration.
>> */
>>
>> - BUG_ON(core->mtype != CORE_ENTRY__MARCH__X86_64);
>> + BUG_ON(core->mtype != CORE_ENTRY__MARCH);
>>
>> task_args->t.pid = pid;
>> task_args->logfd = log_get_fd();
>> @@ -1736,12 +1756,16 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>>
>> strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm));
>>
>> - task_args->t.clear_tid_addr = core->thread_info->clear_tid_addr;
>> + task_args->t.clear_tid_addr = CORE_THREAD_INFO(core)->clear_tid_addr;
>> task_args->ids = *core->ids;
>> - task_args->t.gpregs = *core->thread_info->gpregs;
>> + task_args->t.gpregs = *CORE_GPREGS(core);
>> task_args->t.blk_sigset = core->tc->blk_sigset;
>> task_args->t.has_blk_sigset = true;
>>
>> +#ifdef CONFIG_HAS_TLS
>> + get_core_tls(core, task_args);
>> +#endif
>> +
>> if (core->thread_core) {
>> task_args->t.has_futex = true;
>> task_args->t.futex_rla = core->thread_core->futex_rla;
>> @@ -1810,8 +1834,11 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>> }
>>
>> thread_args[i].ta = task_args;
>> - thread_args[i].gpregs = *core->thread_info->gpregs;
>> - thread_args[i].clear_tid_addr = core->thread_info->clear_tid_addr;
>> + thread_args[i].gpregs = *CORE_GPREGS(core);
>> + thread_args[i].clear_tid_addr = CORE_THREAD_INFO(core)->clear_tid_addr;
>> +#ifdef CONFIG_HAS_TLS
>> + thread_args[i].tls = CORE_THREAD_INFO(core)->tls;
>> +#endif
>>
>> if (core->thread_core) {
>> thread_args[i].has_futex = true;
>> @@ -1850,17 +1877,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>> * An indirect call to task_restore, note it never resturns
>> * and restoreing core is extremely destructive.
>> */
>> - asm volatile(
>> - "movq %0, %%rbx \n"
>> - "movq %1, %%rax \n"
>> - "movq %2, %%rdi \n"
>> - "movq %%rbx, %%rsp \n"
>> - "callq *%%rax \n"
>> - :
>> - : "g"(new_sp),
>> - "g"(restore_task_exec_start),
>> - "g"(task_args)
>> - : "rsp", "rdi", "rsi", "rbx", "rax", "memory");
>> +
>> + jump_to_restorer_blob(new_sp, restore_task_exec_start, task_args);
> Need some order here. Part of PIE asm stuff is done as macros and is written in capitals,
> part is done as functions and is written in small letters. Do all in one style.
>
>>
>> err:
>> free_mappings(&self_vma_list);
>>
>
--
Sincerely yours,
Alexander Kartashov
Intern
Core team
www.parallels.com
Skype: aleksandr.kartashov
Email: alekskartashov at parallels.com
More information about the CRIU
mailing list