[CRIU] [PATCH v2 4/7] lazy-pages: fix memory corruption when combining pre-dump with lazy pages

Pavel Emelyanov xemul at virtuozzo.com
Wed Jun 21 15:30:23 MSK 2017


On 06/21/2017 02:37 PM, Mike Rapoport wrote:
> When we combine pre-dump with lazy pages, we populate a part of a memory
> region with data that was saved during the pre-dump. Afterwards, the
> region is registered with userfaultfd and we expect to get page faults for
> the parts of the region that were not yet populated. However, khugepaged
> collapses the pages and the page faults we would expect do not occur.
> 
> To mitigate this problem we temporarily disable THP for the restored
> process, up to the point when we register all the memory regions with
> userfaultfd.
> 
> https://lists.openvz.org/pipermail/criu/2017-May/037728.html
> 
> Reported-by: Adrian Reber <areber at redhat.com>
> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> ---
>  criu/cr-restore.c       |  3 +++
>  criu/include/pagemap.h  |  5 +++++
>  criu/include/restorer.h |  1 +
>  criu/include/rst_info.h |  2 ++
>  criu/mem.c              | 33 ++++++++++++++++++++++++++++++++-
>  criu/pie/restorer.c     |  8 ++++++++
>  6 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 41b5db3..f403127 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -2767,6 +2767,9 @@ static int prepare_mm(pid_t pid, struct task_restore_args *args)
>  		goto out;
>  
>  	args->fd_exe_link = exe_fd;
> +
> +	args->has_thp_enabled = rsti(current)->has_thp_enabled;
> +
>  	ret = 0;
>  out:
>  	return ret;
> diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h
> index d0a28b9..8007683 100644
> --- a/criu/include/pagemap.h
> +++ b/criu/include/pagemap.h
> @@ -127,6 +127,11 @@ static inline unsigned long pagemap_len(PagemapEntry *pe)
>  	return pe->nr_pages * PAGE_SIZE;
>  }
>  
> +static inline bool page_read_has_parent(struct page_read *pr)
> +{
> +	return pr->parent != NULL;
> +}
> +
>  /* Pagemap flags */
>  #define PE_PARENT	(1 << 0)	/* pages are in parent snapshot */
>  #define PE_LAZY		(1 << 1)	/* pages can be lazily restored */
> diff --git a/criu/include/restorer.h b/criu/include/restorer.h
> index 736ba96..9595ad6 100644
> --- a/criu/include/restorer.h
> +++ b/criu/include/restorer.h
> @@ -120,6 +120,7 @@ struct task_restore_args {
>  	struct timeval			logstart;
>  
>  	int				uffd;
> +	bool				has_thp_enabled;
>  
>  	/* threads restoration */
>  	int				nr_threads;		/* number of threads */
> diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h
> index 78b1f64..f9840d1 100644
> --- a/criu/include/rst_info.h
> +++ b/criu/include/rst_info.h
> @@ -62,6 +62,8 @@ struct rst_info {
>  	 */
>  	bool			has_seccomp;
>  
> +	bool			has_thp_enabled;
> +
>  	void			*breakpoint;
>  };
>  
> diff --git a/criu/mem.c b/criu/mem.c
> index 509f668..0b38ae8 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -4,6 +4,7 @@
>  #include <errno.h>
>  #include <fcntl.h>
>  #include <sys/syscall.h>
> +#include <sys/prctl.h>
>  
>  #include "types.h"
>  #include "cr_options.h"
> @@ -27,6 +28,7 @@
>  #include "files-reg.h"
>  #include "pagemap-cache.h"
>  #include "fault-injection.h"
> +#include "prctl.h"
>  #include <compel/compel.h>
>  
>  #include "protobuf.h"
> @@ -1024,6 +1026,33 @@ err_addr:
>  	return -1;
>  }
>  
> +static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr)
> +{
> +	int ret;
> +
> +	if (!(opts.lazy_pages && page_read_has_parent(pr)))

Why checking for page_read_has_parent()? I mean -- what if page read doesn't
have parent, why do we keep THP on?

> +		return 0;
> +
> +	if (!kdat.has_thp_disable)
> +		pr_warn("Disabling transparent huge pages. "
> +			"It may affect performance!\n");
> +
> +	/*
> +	 * temporarily disable THP to avoid collapse of pages
> +	 * in the areas that will be monitored by uffd
> +	 */
> +	ret = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0, 0);

What is get for?

> +	if (unlikely(ret < 0))
> +		return -1;
> +	if (!ret && prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
> +		pr_perror("Cannot disable THP");
> +		return -1;
> +	}
> +	rsti(t)->has_thp_enabled = !ret;
> +
> +	return 0;
> +}
> +
>  int prepare_mappings(struct pstree_item *t)
>  {
>  	int ret = 0;
> @@ -1055,6 +1084,9 @@ int prepare_mappings(struct pstree_item *t)
>  	if (ret <= 0)
>  		return -1;
>  
> +	if (maybe_disable_thp(t, &pr))
> +		return -1;
> +
>  	pr.advance(&pr); /* shift to the 1st iovec */
>  
>  	ret = premap_priv_vmas(t, vmas, &addr, &pr);
> @@ -1196,4 +1228,3 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta)
>  
>  	return prepare_vma_ios(t, ta);
>  }
> -
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 7b9c052..28a1129 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -1272,6 +1272,14 @@ long __export_restore_task(struct task_restore_args *args)
>  	}
>  
>  	if (args->uffd > -1) {
> +		/* re-enable THP if we disabled it previously */
> +		if (args->has_thp_enabled) {
> +			if (sys_prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
> +				pr_err("Cannot re-enable THP\n");
> +				goto core_restore_end;
> +			}
> +		}
> +
>  		pr_debug("lazy-pages: closing uffd %d\n", args->uffd);
>  		/*
>  		 * All userfaultfd configuration has finished at this point.
> 



More information about the CRIU mailing list