[CRIU] [PATCH 07/11] vma: Move cow decision earlier

Pavel Emelyanov xemul at virtuozzo.com
Wed May 10 01:49:40 PDT 2017


On 05/07/2017 01:49 PM, Mike Rapoport wrote:
> On Fri, May 05, 2017 at 07:03:16PM +0300, Pavel Emelyanov wrote:
>> Collect VMAs into COW-groups. This is done by checking each pstree_item's
>> VMA list in parallel with the parent one and finding VMAs that have
>> chances to get COW pages. The vma->parent pointer is used to tie such
>> areas together.
>>
>> As a nice side effect -- tasks with different exe files are not even
>> tried to cow-ed after this patch.
>>
>> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
>> ---
>>  criu/cr-restore.c  |   2 +
>>  criu/include/mem.h |   1 +
>>  criu/include/vma.h |  11 ++++
>>  criu/mem.c         | 162 ++++++++++++++++++++++++++++++++++-------------------
>>  4 files changed, 117 insertions(+), 59 deletions(-)
>>
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index f3f5f67..bbe0dad 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -367,6 +367,8 @@ static int root_prepare_shared(void)
>>  	if (ret < 0)
>>  		goto err;
>>
>> +	prepare_cow_vmas();
>> +
>>  	ret = prepare_restorer_blob();
>>  	if (ret)
>>  		goto err;
>> diff --git a/criu/include/mem.h b/criu/include/mem.h
>> index 220a301..6791bfd 100644
>> --- a/criu/include/mem.h
>> +++ b/criu/include/mem.h
>> @@ -18,6 +18,7 @@ struct mem_dump_ctl {
>>  extern bool page_is_zero(u64 pme);
>>  extern bool page_in_parent(bool dirty);
>>  extern int prepare_mm_pid(struct pstree_item *i);
>> +extern void prepare_cow_vmas(void);
>>  extern int do_task_reset_dirty_track(int pid);
>>  extern unsigned long dump_pages_args_size(struct vm_area_list *vmas);
>>  extern int parasite_dump_pages_seized(struct pstree_item *item,
>> diff --git a/criu/include/vma.h b/criu/include/vma.h
>> index 12f03fb..dcce080 100644
>> --- a/criu/include/vma.h
>> +++ b/criu/include/vma.h
>> @@ -57,6 +57,17 @@ struct vma_area {
>>  			struct vma_area	*pvma;		/* parent for inherited VMAs */
>>  			unsigned long	*page_bitmap;	/* existent pages */
>>  			unsigned long	premmaped_addr;	/* restore only */
>> +
>> +			/*
>> +			 * Some notes about pvma, page_bitmap and premmaped_addr bits
>> +			 * above.
>> +			 *
>> +			 * The pvma is set on prepare_cow_vmas() when resolving which
>> +			 * VMAs _may_ inherit pages from which. The other two are set
>> +			 * in prepare_mappings() when the respective VMAs get mmap-ed
>> +			 * or mremap-ed, and are then inherited on fork_with_pid()-s
>> +			 * called from create_children_and_session().
>> +			 */
> 
> * The pvma is set in prepare_cow_vmas() when we resolve which
> * VMAs _may_ inherit pages from each other.
> * The page_bitmap and premmaped_addr are set in prepare_mappings()
> * when the respective VMAs get mmap-ed or mremap-ed.
> * These VMAs are then inherited during fork_with_pid()-s
> * called from create_children_and_session().

Thanks!

>>  		};
>>  	};
>>  };
>> diff --git a/criu/mem.c b/criu/mem.c
>> index 1b805cd..a9e6e08 100644
>> --- a/criu/mem.c
>> +++ b/criu/mem.c
>> @@ -552,15 +552,99 @@ int prepare_mm_pid(struct pstree_item *i)
>>  	return ret;
>>  }
>>
>> +static inline bool check_cow_vmas(struct vma_area *vma, struct vma_area *pvma)
>> +{
>> +	/*
>> +	 * VMAs that _may_[1] have COW-ed pages should ...
>> +	 *
>> +	 * [1] I say "may" because whether or not particular pages are
>> +	 * COW-ed is determined later in restore_priv_vma_content() by
>> +	 * memcpy-ing the contents.
> 
> memcmp'aring?

Indeed :\

>> +	 */
>> +
>> +	/* ... coinside by start/stop pair (start is checked by caller) */
>> +	if (vma->e->end != pvma->e->end)
>> +		return false;
>> +	/* ... both be private (and thus have space in premmaped area) */
>> +	if (!vma_area_is_private(vma, kdat.task_size))
>> +		return false;
>> +	if (!vma_area_is_private(vma, kdat.task_size))
>> +		return false;
> 
> The check for the same VMA is repeated here, should be pvma in one of the
> case above, IMHO.

Yup, nice catch.

>> +	/* ... have growsdown and anon flags coinside */
>> +	if ((vma->e->flags ^ pvma->e->flags) & (MAP_GROWSDOWN | MAP_ANONYMOUS))
>> +		return false;
>> +	/* ... belong to the same file if being filemap */
>> +	if (!(vma->e->flags & MAP_ANONYMOUS) && vma->e->shmid != pvma->e->shmid)
>> +		return false;
>> +
>> +	pr_debug("Found two COW VMAs @%#lx-%#lx\n", vma->e->start, pvma->e->end);
>> +	return true;
>> +}
>> +
>> +static void prepare_cow_vmas_for(struct vm_area_list *vmas, struct vm_area_list *pvmas)
>> +{
>> +	struct vma_area *vma, *pvma;
>> +
>> +	vma = list_first_entry(&vmas->h, struct vma_area, list);
>> +	pvma = list_first_entry(&pvmas->h, struct vma_area, list);
>> +
>> +	while (1) {
>> +		if ((vma->e->start == pvma->e->start) && check_cow_vmas(vma, pvma))
>> +			vma->pvma = pvma;
>> +
>> +		/* <= here to shift from matching VMAs and ... */
>> +		while (vma->e->start <= pvma->e->start) {
>> +			vma = list_entry(vma->list.next, struct vma_area, list);
>> +			if (&vma->list == &vmas->h)
>> +				return;
>> +		}
>> +
>> +		/* ... no == here since we must stop on matching pair */
>> +		while (pvma->e->start < vma->e->start) {
>> +			pvma = list_entry(pvma->list.next, struct vma_area, list);
> 
> It's worth having vma_next already by this point ;-)

OK, will try to resort the patches.

>> +			if (&pvma->list == &pvmas->h)
>> +				return;
>> +		}
>> +	}
>> +}
>> +

-- Pavel



More information about the CRIU mailing list