[CRIU] [PATCH 11/11] mem: Delayed vma/pr restore

Mike Rapoport rppt at linux.vnet.ibm.com
Sun May 7 03:52:09 PDT 2017


On Fri, May 05, 2017 at 07:04:22PM +0300, Pavel Emelyanov wrote:
> Performance experiments show, that we spend (relatively) a lot of time
> mremap-ing areas from premap area into their proper places. This time
> depends on the task being restored, but for those with many vmas this
> can be up to 20%.
> 
> The thing is that premapping is only needed to restore cow pages since
> we don't have any API in the kernel to share a page between two or more
> anonymous vmas. For non-cowing areas we map mmap() them directly in
> place. But for such cases we'll also need to restore the page's contents
> also from the pie code.
> 
> Doing the whole page-read code from PIE is way too complex (for now), so
> the proposal is to optimize the case when we have a single local pagemap
> layer. This is what pr.pieok boolean stands for.
> 
> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
> ---
>  criu/cr-restore.c       |  1 +
>  criu/include/pagemap.h  |  6 ++++++
>  criu/include/restorer.h | 10 ++++++++++
>  criu/include/rst_info.h |  2 ++
>  criu/mem.c              | 45 ++++++++++++++++++++++++++++++++++++++++++++-
>  criu/pagemap.c          | 35 +++++++++++++++++++++++++++++++++--
>  criu/pie/restorer.c     | 41 +++++++++++++++++++++++++++++++++++++++++
>  criu/pstree.c           |  1 +
>  8 files changed, 138 insertions(+), 3 deletions(-)
> 

[...]

> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index dcc1332..79076d9 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -795,6 +822,7 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	pr->bunch.iov_len = 0;
>  	pr->bunch.iov_base = NULL;
>  	pr->pmes = NULL;
> +	pr->pieok = false;
> 
>  	pr->pmi = open_image_at(dfd, i_typ, O_RSTR, (long)pid);
>  	if (!pr->pmi)
> @@ -836,8 +864,11 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  		pr->maybe_read_page = maybe_read_page_img_cache;
>  	else if (remote)
>  		pr->maybe_read_page = maybe_read_page_remote;
> -	else
> +	else {
>  		pr->maybe_read_page = maybe_read_page_local;
> +		if (!pr->parent)
> +			pr->pieok = true;
> +	}

checkpatch.pl would be unhappy about this chunk ;-)
It seems that some of the previous patches also have if-else clauses with
braces added only for some of the arms.

> 
>  	pr_debug("Opened %s page read %u (parent %u)\n",
>  		 remote ? "remote" : "local", pr->id,
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index dc8a4d0..4c91ddd 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -1270,6 +1270,47 @@ long __export_restore_task(struct task_restore_args *args)
>  		}
>  	}
> 
> +	/*
> +	 * Now read the contents (if any)
> +	 */
> +
> +	for (i = 0; i < args->vma_ios_n; i++) {
> +		struct restore_vma_io *rio = args->vma_ios + i;
> +		struct iovec *iovs = rio->iovs;
> +		int nr = rio->nr_iovs;
> +		ssize_t r;
> +
> +		while (nr) {
> +			pr_debug("Preadv %lx:%d... (%d iovs)\n",
> +					(unsigned long)iovs->iov_base,
> +					(int)iovs->iov_len, nr);
> +			r = sys_preadv(args->vma_ios_fd, iovs, nr, rio->off);
> +			if (r < 0) {
> +				pr_err("Can't read pages data (%d)\n", (int)r);
> +				goto core_restore_end;
> +			}
> +
> +			pr_debug("`- returned %ld\n", (long)r);
> +			rio->off += r;
> +			/* Advance the iovecs */
> +			do {
> +				if (iovs->iov_len <= r) {
> +					pr_debug("   `- skip pagemap\n");
> +					r -= iovs->iov_len;
> +					iovs++;
> +					nr--;
> +					continue;
> +				}
> +
> +				iovs->iov_base += r;
> +				iovs->iov_len -= r;
> +				break;
> +			} while (nr > 0);
> +		}
> +	}
> +
> +	sys_close(args->vma_ios_fd);
> +
>  #ifdef CONFIG_VDSO
>  	/*
>  	 * Proxify vDSO.
> diff --git a/criu/pstree.c b/criu/pstree.c
> index b512e43..dee5f3d 100644
> --- a/criu/pstree.c
> +++ b/criu/pstree.c
> @@ -224,6 +224,7 @@ struct pstree_item *__alloc_pstree_item(bool rst, int level)
>  			return NULL;
>  		memset(item, 0, sz);
>  		vm_area_list_init(&rsti(item)->vmas);
> +		INIT_LIST_HEAD(&rsti(item)->vma_io);
>  		/*
>  		 * On restore we never expand pid level,
>  		 * so allocate them all at once.
> -- 
> 2.5.5
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
> 



More information about the CRIU mailing list