[CRIU] [PATCH 10/20] cr-restore.c: introduced the multiarch support.

Alexander Kartashov alekskartashov at parallels.com
Wed Dec 12 11:28:48 EST 2012


> WTF is that? It's just unsigned long printing, why this ugly format hacks?!

Unfortunately, almost all format strings have to be fixed in this way.
In this case vma->vma.start is 64-bit long but %lx on ARM expects
32-bit interger.

> Why is this loop better than memecpy?

The size of auxv_t is different on X86 and ARM.

> If you find logging not sufficient, send log extensions in separate patches.


> Need some order here. Part of PIE asm stuff is done as macros and is written in capitals,
> part is done as functions and is written in small letters. Do all in one style.

To be fixed in the next version.



On 12/12/2012 08:03 PM, Pavel Emelyanov wrote:
> On 12/12/2012 05:34 PM, alekskartashov at parallels.com wrote:
>> From: Alexander Kartashov <alekskartashov at parallels.com>
>>
>> * Introduced the macro CONFIG_DEBUG_RESTORE to stop restoration after a fork.
>> * Introduced the macro CORE_GPREGS to access machine general-purpose registers.
>> * Introduced the macro jump_to_restorer_blob to jump to the restorer blob
>>    in a machine-dependent way.
>>
>> Signed-off-by: Alexander Kartashov <alekskartashov at parallels.com>
>> ---
>>   cr-restore.c |   74 ++++++++++++++++++++++++++++++++++++----------------------
>>   1 file changed, 46 insertions(+), 28 deletions(-)
>>
>> diff --git a/cr-restore.c b/cr-restore.c
>> index 8dd1886..995656e 100644
>> --- a/cr-restore.c
>> +++ b/cr-restore.c
>> @@ -59,6 +59,8 @@
>>   #include "protobuf/itimer.pb-c.h"
>>   #include "protobuf/vma.pb-c.h"
>>   
>> +#include <arch_restorer.h>
>> +
>>   static struct pstree_item *current;
>>   
>>   static int restore_task_with_children(void *);
>> @@ -210,7 +212,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>>   
>>   		if (p->vma.end == vma->vma.end &&
>>   		    p->vma.start == vma->vma.start) {
>> -			pr_info("COW 0x%016lx-0x%016lx 0x%016lx vma\n",
>> +			pr_info("COW 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
>>   				vma->vma.start, vma->vma.end, vma->vma.pgoff);
> WTF is that? It's just unsigned long printing, why this ugly format hacks?!
>
>>   			paddr = (void *) vma_premmaped_start(&p->vma);
>>   			break;
>> @@ -221,7 +223,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>>   	*pvma = p;
>>   
>>   	if (paddr == NULL) {
>> -		pr_info("Map 0x%016lx-0x%016lx 0x%016lx vma\n",
>> +		pr_info("Map 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
>>   			vma->vma.start, vma->vma.end, vma->vma.pgoff);
>>   
>>   		addr = mmap(tgt_addr, vma_entry_len(&vma->vma),
>> @@ -272,7 +274,7 @@ static int restore_priv_vma_content(pid_t pid)
>>   	 * Read page contents.
>>   	 */
>>   	while (1) {
>> -		u64 va, page_offset;
>> +		size_t va, page_offset;
> Shouldn't it be unsigned long instead? Memory addresses are "encoded" in
> unsigned long-s, not in size_t-s.
>
>>   		char buf[PAGE_SIZE];
>>   		void *p;
>>   
>> @@ -467,7 +469,7 @@ static int open_vmas(int pid)
>>   		if (!(vma_entry_is(&vma->vma, VMA_AREA_REGULAR)))
>>   			continue;
>>   
>> -		pr_info("Opening 0x%016lx-0x%016lx 0x%016lx (%x) vma\n",
>> +		pr_info("Opening 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma (%x)\n",
>>   				vma->vma.start, vma->vma.end,
>>   				vma->vma.pgoff, vma->vma.status);
>>   
>> @@ -701,7 +703,7 @@ static int check_core(CoreEntry *core)
>>   {
>>   	int ret = -1;
>>   
>> -	if (core->mtype != CORE_ENTRY__MARCH__X86_64) {
>> +	if (core->mtype != CORE_ENTRY__MARCH) {
>>   		pr_err("Core march mismatch %d\n", (int)core->mtype);
>>   		goto out;
>>   	}
>> @@ -717,7 +719,7 @@ static int check_core(CoreEntry *core)
>>   			goto out;
>>   		}
>>   
>> -		if (!core->thread_info) {
>> +		if (!CORE_THREAD_INFO(core)) {
>>   			pr_err("Core info data missed for non-zombie\n");
>>   			goto out;
>>   		}
>> @@ -998,6 +1000,13 @@ static int restore_task_with_children(void *_arg)
>>   	int ret;
>>   	sigset_t blockmask;
>>   
>> +#ifdef CONFIG_DEBUG_RESTORE
>> +	if (kill(getpid(), SIGSTOP) < 0) {
>> +		pr_err("Failed to stop oneself: %d\n", errno);
>> +		return -1;
>> +	}
>> +#endif
> :\ Lost debugging stuff?
>
>> +
>>   	close_safe(&ca->fd);
>>   
>>   	current = ca->item;
>> @@ -1120,6 +1129,8 @@ static int restore_switch_stage(int next_stage)
>>   static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
>>   {
>>   	int ret;
>> +
>> +#ifndef CONFIG_DEBUG_RESTORE
>>   	struct sigaction act, old_act;
>>   
>>   	ret = sigaction(SIGCHLD, NULL, &act);
>> @@ -1138,6 +1149,7 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
>>   		pr_perror("sigaction() failed\n");
>>   		return -1;
>>   	}
>> +#endif
>>   
>>   	/*
>>   	 * FIXME -- currently we assume that all the tasks live
>> @@ -1183,12 +1195,16 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
>>   
>>   	futex_wait_until(&task_entries->nr_in_progress, 0);
>>   
>> +
>> +#ifndef CONFIG_DEBUG_RESTORE
>>   	/* Restore SIGCHLD here to skip SIGCHLD from a network sctip */
>> +
>>   	ret = sigaction(SIGCHLD, &old_act, NULL);
>>   	if (ret < 0) {
>>   		pr_perror("sigaction() failed\n");
>>   		goto out;
>>   	}
>> +#endif
>>   
>>   	network_unlock();
>>   out:
>> @@ -1248,7 +1264,6 @@ int cr_restore_tasks(pid_t pid, struct cr_options *opts)
>>   	return restore_root_task(root_item, opts);
>>   }
>>   
>> -#define TASK_SIZE_MAX   ((1UL << 47) - PAGE_SIZE)
>>   static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
>>   		struct list_head *self_vma_list, long vma_len)
>>   {
>> @@ -1256,7 +1271,7 @@ static long restorer_get_vma_hint(pid_t pid, struct list_head *tgt_vma_list,
>>   	long prev_vma_end = 0;
>>   	struct vma_area end_vma;
>>   
>> -	end_vma.vma.start = end_vma.vma.end = TASK_SIZE_MAX;
>> +	end_vma.vma.start = end_vma.vma.end = TASK_SIZE;
>>   	prev_vma_end = PAGE_SIZE;
>>   
>>   	s_vma = list_first_entry(self_vma_list, struct vma_area, list);
>> @@ -1456,7 +1471,7 @@ static VmaEntry *vma_list_remap(void *addr, unsigned long len, struct list_head
>>   
>>   static int prepare_mm(pid_t pid, struct task_restore_core_args *args)
>>   {
>> -	int fd, exe_fd, ret = -1;
>> +	int fd, exe_fd, i, ret = -1;
>>   	MmEntry *mm;
>>   
>>   	fd = open_image_ro(CR_FD_MM, pid);
>> @@ -1478,8 +1493,10 @@ static int prepare_mm(pid_t pid, struct task_restore_core_args *args)
>>   	}
>>   
>>   	args->mm_saved_auxv_size = pb_repeated_size(mm, mm_saved_auxv);
>> -	memcpy(args->mm_saved_auxv, mm->mm_saved_auxv,
>> -	       args->mm_saved_auxv_size);
>> +
>> +	for (i = 0; i < mm->n_mm_saved_auxv; ++i) {
>> +		args->mm_saved_auxv[i] = (auxv_t)mm->mm_saved_auxv[i];
> Why is this loop better than memecpy?
>
>> +	}
>>   
>>   	exe_fd = open_reg_by_id(args->mm.exe_file_id);
>>   	if (exe_fd < 0)
>> @@ -1649,8 +1666,11 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>>   			KBYTES(restore_bootstrap_len));
>>   
>>   	ret = remap_restorer_blob((void *)exec_mem_hint);
>> -	if (ret < 0)
>> +
>> +	if (ret < 0) {
>> +		pr_err("Remapping the restorer blob failed\n");
> If you find logging not sufficient, send log extensions in separate patches.
>
>>   		goto err;
>> +	}
>>   
>>   	/*
>>   	 * Prepare a memory map for restorer. Note a thread space
>> @@ -1727,7 +1747,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>>   	 * Arguments for task restoration.
>>   	 */
>>   
>> -	BUG_ON(core->mtype != CORE_ENTRY__MARCH__X86_64);
>> +	BUG_ON(core->mtype != CORE_ENTRY__MARCH);
>>   
>>   	task_args->t.pid	= pid;
>>   	task_args->logfd	= log_get_fd();
>> @@ -1736,12 +1756,16 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>>   
>>   	strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm));
>>   
>> -	task_args->t.clear_tid_addr	= core->thread_info->clear_tid_addr;
>> +	task_args->t.clear_tid_addr	= CORE_THREAD_INFO(core)->clear_tid_addr;
>>   	task_args->ids			= *core->ids;
>> -	task_args->t.gpregs		= *core->thread_info->gpregs;
>> +	task_args->t.gpregs		= *CORE_GPREGS(core);
>>   	task_args->t.blk_sigset		= core->tc->blk_sigset;
>>   	task_args->t.has_blk_sigset	= true;
>>   
>> +#ifdef CONFIG_HAS_TLS
>> +	get_core_tls(core, task_args);
>> +#endif
>> +
>>   	if (core->thread_core) {
>>   		task_args->t.has_futex		= true;
>>   		task_args->t.futex_rla		= core->thread_core->futex_rla;
>> @@ -1810,8 +1834,11 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>>   		}
>>   
>>   		thread_args[i].ta		= task_args;
>> -		thread_args[i].gpregs		= *core->thread_info->gpregs;
>> -		thread_args[i].clear_tid_addr	= core->thread_info->clear_tid_addr;
>> +		thread_args[i].gpregs		= *CORE_GPREGS(core);
>> +		thread_args[i].clear_tid_addr	= CORE_THREAD_INFO(core)->clear_tid_addr;
>> +#ifdef CONFIG_HAS_TLS
>> +		thread_args[i].tls		= CORE_THREAD_INFO(core)->tls;
>> +#endif
>>   
>>   		if (core->thread_core) {
>>   			thread_args[i].has_futex	= true;
>> @@ -1850,17 +1877,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>>   	 * An indirect call to task_restore, note it never resturns
>>   	 * and restoreing core is extremely destructive.
>>   	 */
>> -	asm volatile(
>> -		"movq %0, %%rbx						\n"
>> -		"movq %1, %%rax						\n"
>> -		"movq %2, %%rdi						\n"
>> -		"movq %%rbx, %%rsp					\n"
>> -		"callq *%%rax						\n"
>> -		:
>> -		: "g"(new_sp),
>> -		  "g"(restore_task_exec_start),
>> -		  "g"(task_args)
>> -		: "rsp", "rdi", "rsi", "rbx", "rax", "memory");
>> +
>> +	jump_to_restorer_blob(new_sp, restore_task_exec_start, task_args);
> Need some order here. Part of PIE asm stuff is done as macros and is written in capitals,
> part is done as functions and is written in small letters. Do all in one style.
>
>>   
>>   err:
>>   	free_mappings(&self_vma_list);
>>
>


-- 
Sincerely yours,
Alexander Kartashov

Intern
Core team

www.parallels.com

Skype: aleksandr.kartashov
Email: alekskartashov at parallels.com



More information about the CRIU mailing list