[CRIU] [PATCH 09/15] restorer: remap private vmas to correct places (v2)

Pavel Emelyanov xemul at parallels.com
Wed Nov 14 05:21:14 EST 2012


On 11/14/2012 02:10 PM, Andrew Vagin wrote:
> On Mon, Nov 12, 2012 at 03:40:08PM +0400, Pavel Emelyanov wrote:
>> On 11/02/2012 05:32 PM, Andrey Vagin wrote:
>>> All private vmas are placed in a premmapped region and
>>> they are sorted by start addresses, so they should be shifted apart.
>>>
>>> Here is one more problem with overlapped temporary and target regions,
>>> mremap could not remap such cases directly, so for such cases a vma is
>>> remapped away and then remapped on a target place.
>>>
>>> v2: fix accoding with Pavel's comments
>>>
>>> Signed-off-by: Andrey Vagin <avagin at openvz.org>
>>> ---
>>>  cr-restore.c       |  1 +
>>>  include/restorer.h |  1 +
>>>  restorer.c         | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  3 files changed, 91 insertions(+)
>>>
>>> diff --git a/cr-restore.c b/cr-restore.c
>>> index 7d05c31..2204b4c 100644
>>> --- a/cr-restore.c
>>> +++ b/cr-restore.c
>>> @@ -1603,6 +1603,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>>>  
>>>  	mem += self_vmas_len;
>>>  	task_args->tgt_vmas = vma_list_remap(mem, vmas_len, &rst_vma_list);
>>> +	task_args->nr_vmas = rst_nr_vmas;
>>>  	task_args->premmapped_addr = (unsigned long) premmapped_addr;
>>>  	task_args->premmapped_len = premmapped_len;
>>>  	if (!task_args->tgt_vmas)
>>> diff --git a/include/restorer.h b/include/restorer.h
>>> index 377afff..e64c114 100644
>>> --- a/include/restorer.h
>>> +++ b/include/restorer.h
>>> @@ -99,6 +99,7 @@ struct task_restore_core_args {
>>>  	struct task_entries		*task_entries;
>>>  	VmaEntry			*self_vmas;
>>>  	VmaEntry			*tgt_vmas;
>>> +	unsigned int			nr_vmas;
>>>  	unsigned long			premmapped_addr;
>>>  	unsigned long			premmapped_len;
>>>  	rt_sigaction_t			sigchld_act;
>>> diff --git a/restorer.c b/restorer.c
>>> index 6c0d888..c477c65 100644
>>> --- a/restorer.c
>>> +++ b/restorer.c
>>> @@ -315,6 +315,59 @@ static void rst_tcp_socks_all(int *arr, int size)
>>>  	sys_munmap(arr, size);
>>>  }
>>>  
>>> +static int vma_remap(unsigned long src, unsigned long dst, unsigned long len)
>>> +{
>>> +	unsigned long guard = 0, tmp;
>>> +
>>> +	pr_info("Remap %lx->%lx len %lx\n", src, dst, len);
>>> +
>>> +	if (src - dst < len)
>>> +		guard = dst;
>>> +	else if (dst - src < len)
>>> +		guard = dst + len - PAGE_SIZE;
>>> +
>>> +	if (src == dst)
>>> +		return 0;
>>> +
>>
>> Put HUGE comment here describing what's going on and why guard page is required.
>> With images would be just great.
> 
> Like this?
> 
> /* mremap() returns an error if a target and source vma-s are
>  * overlapped. In this case the source vma are remapped in
>  * a temporary place and then remapped to the target address.
>  * Here is one hack to find non-ovelapped temporary place.
>  * |___tgt___|=======|---src---|

1. initial placement. We need to move src -> tgt.

|       |+++++src+++++|
|-----tgt-----|       |

2. map guard page at tgt.start

|       |+++++src+++++|
|G|----tgt----|       |

3. remap src to any other place.
   G prevents src from being remaped on tgt again

|       |-------------| -> |+++++src+++++|
|G|---tgt-----|                          |


4. remap src to tgt, no overlapping any longer

|+++++src+++++|   <----    |-------------|
|G|---tgt-----|                          |


>  * 1. Map a guard page to the non-ovelapped border of a target vma.
>  * |G|__tgt__|=======|---src---|
>  *
>  * 2. Map a new vma with the size of source, it will not
>  *    overlapped due to the guard page
>  * |G|__tgt___________|            |------tmp------|
>  *
>  * 3. Remap the vma to the target address
>  * |____tgt___________|
>  */
> 
>>
>>> +	if (guard != 0) {
>>> +		/* Regions are overlapped */
>>> +		unsigned long addr;
>>> +
>>> +		/* Prevent overlapping with a temporary place */
>>> +		tmp = sys_mmap((void *) guard, PAGE_SIZE, PROT_NONE,
>>> +					MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
>>> +		if (tmp != guard) {
>>> +			pr_err("Unable to map a guard page %lx (%lx)\n", guard, tmp);
>>> +			return -1;
>>> +		}
>>> +
>>> +		/* Choose a temporary place */
>>> +		addr = sys_mmap(NULL, len, PROT_NONE,
>>> +					MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
>>> +		if (addr == (unsigned long) MAP_FAILED) {
>>> +			pr_err("Unable to reserve memory (%lx)\n", addr);
>>> +			return -1;
>>> +		}
>>> +
>>> +		tmp = sys_mremap(src, len, len,
>>> +					MREMAP_MAYMOVE | MREMAP_FIXED, addr);
>>> +		if (tmp != addr) {
>>> +			pr_err("Unable to remap %lx -> %lx (%lx)\n", src, addr, tmp);
>>> +			return -1;
>>> +		}
>>> +
>>> +		src = addr;
>>> +	}
>>> +
>>> +	tmp = sys_mremap(src, len, len, MREMAP_MAYMOVE | MREMAP_FIXED, dst);
>>> +	if (tmp != dst) {
>>> +		pr_err("Unable to remap %lx -> %lx\n", src, dst);
>>> +		return -1;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  /*
>>>   * The main routine to restore task via sigreturn.
>>>   * This one is very special, we never return there
>>> @@ -378,6 +431,39 @@ long __export_restore_task(struct task_restore_core_args *args)
>>>  	sys_munmap(args->self_vmas,
>>>  			((void *)(vma_entry + 1) - ((void *)args->self_vmas)));
>>>  
>>> +	/* Shift private vma-s to the left */
>>> +	for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
>>> +		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
>>> +			continue;
>>> +
>>> +		if (!vma_priv(vma_entry))
>>> +			continue;
>>> +
>>> +		if (vma_entry->start > vma_entry->shmid)
>>> +			break;
>>> +
>>> +		if (vma_remap(vma_premmaped_start(vma_entry),
>>> +				vma_entry->start, vma_entry_len(vma_entry)))
>>> +			goto core_restore_end;
>>> +	}
>>> +
>>> +	/* Shift private vma-s to the right */
>>> +	for (vma_entry = args->tgt_vmas + args->nr_vmas -1;
>>> +				vma_entry >= args->tgt_vmas; vma_entry--) {
>>> +		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
>>> +			continue;
>>> +
>>> +		if (!vma_priv(vma_entry))
>>> +			continue;
>>> +
>>> +		if (vma_entry->start < vma_entry->shmid)
>>> +			break;
>>> +
>>> +		if (vma_remap(vma_premmaped_start(vma_entry),
>>> +				vma_entry->start, vma_entry_len(vma_entry)))
>>> +			goto core_restore_end;
>>> +	}
>>> +
>>>  	/*
>>>  	 * OK, lets try to map new one.
>>>  	 */
>>> @@ -385,6 +471,9 @@ long __export_restore_task(struct task_restore_core_args *args)
>>>  		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
>>>  			continue;
>>>  
>>> +		if (vma_priv(vma_entry))
>>> +			continue;
>>> +
>>>  		va = restore_mapping(vma_entry);
>>>  
>>>  		if (va != vma_entry->start) {
>>>
>>
>>
> .
> 




More information about the CRIU mailing list