[CRIU] [PATCH v2 4/7] lazy-pages: fix memory corruption when combining pre-dump with lazy pages
Pavel Emelyanov
xemul at virtuozzo.com
Wed Jun 21 15:30:23 MSK 2017
On 06/21/2017 02:37 PM, Mike Rapoport wrote:
> When we combine pre-dump with lazy pages, we populate a part of a memory
> region with data that was saved during the pre-dump. Afterwards, the
> region is registered with userfaultfd and we expect to get page faults for
> the parts of the region that were not yet populated. However, khugepaged
> collapses the pages and the page faults we would expect do not occur.
>
> To mitigate this problem we temporarily disable THP for the restored
> process, up to the point when we register all the memory regions with
> userfaultfd.
>
> https://lists.openvz.org/pipermail/criu/2017-May/037728.html
>
> Reported-by: Adrian Reber <areber at redhat.com>
> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
> ---
> criu/cr-restore.c | 3 +++
> criu/include/pagemap.h | 5 +++++
> criu/include/restorer.h | 1 +
> criu/include/rst_info.h | 2 ++
> criu/mem.c | 33 ++++++++++++++++++++++++++++++++-
> criu/pie/restorer.c | 8 ++++++++
> 6 files changed, 51 insertions(+), 1 deletion(-)
>
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 41b5db3..f403127 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -2767,6 +2767,9 @@ static int prepare_mm(pid_t pid, struct task_restore_args *args)
> goto out;
>
> args->fd_exe_link = exe_fd;
> +
> + args->has_thp_enabled = rsti(current)->has_thp_enabled;
> +
> ret = 0;
> out:
> return ret;
> diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h
> index d0a28b9..8007683 100644
> --- a/criu/include/pagemap.h
> +++ b/criu/include/pagemap.h
> @@ -127,6 +127,11 @@ static inline unsigned long pagemap_len(PagemapEntry *pe)
> return pe->nr_pages * PAGE_SIZE;
> }
>
> +static inline bool page_read_has_parent(struct page_read *pr)
> +{
> + return pr->parent != NULL;
> +}
> +
> /* Pagemap flags */
> #define PE_PARENT (1 << 0) /* pages are in parent snapshot */
> #define PE_LAZY (1 << 1) /* pages can be lazily restored */
> diff --git a/criu/include/restorer.h b/criu/include/restorer.h
> index 736ba96..9595ad6 100644
> --- a/criu/include/restorer.h
> +++ b/criu/include/restorer.h
> @@ -120,6 +120,7 @@ struct task_restore_args {
> struct timeval logstart;
>
> int uffd;
> + bool has_thp_enabled;
>
> /* threads restoration */
> int nr_threads; /* number of threads */
> diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h
> index 78b1f64..f9840d1 100644
> --- a/criu/include/rst_info.h
> +++ b/criu/include/rst_info.h
> @@ -62,6 +62,8 @@ struct rst_info {
> */
> bool has_seccomp;
>
> + bool has_thp_enabled;
> +
> void *breakpoint;
> };
>
> diff --git a/criu/mem.c b/criu/mem.c
> index 509f668..0b38ae8 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -4,6 +4,7 @@
> #include <errno.h>
> #include <fcntl.h>
> #include <sys/syscall.h>
> +#include <sys/prctl.h>
>
> #include "types.h"
> #include "cr_options.h"
> @@ -27,6 +28,7 @@
> #include "files-reg.h"
> #include "pagemap-cache.h"
> #include "fault-injection.h"
> +#include "prctl.h"
> #include <compel/compel.h>
>
> #include "protobuf.h"
> @@ -1024,6 +1026,33 @@ err_addr:
> return -1;
> }
>
> +static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr)
> +{
> + int ret;
> +
> + if (!(opts.lazy_pages && page_read_has_parent(pr)))
Why checking for page_read_has_parent()? I mean -- what if page read doesn't
have parent, why do we keep THP on?
> + return 0;
> +
> + if (!kdat.has_thp_disable)
> + pr_warn("Disabling transparent huge pages. "
> + "It may affect performance!\n");
> +
> + /*
> + * temporarily disable THP to avoid collapse of pages
> + * in the areas that will be monitored by uffd
> + */
> + ret = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0, 0);
What is get for?
> + if (unlikely(ret < 0))
> + return -1;
> + if (!ret && prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
> + pr_perror("Cannot disable THP");
> + return -1;
> + }
> + rsti(t)->has_thp_enabled = !ret;
> +
> + return 0;
> +}
> +
> int prepare_mappings(struct pstree_item *t)
> {
> int ret = 0;
> @@ -1055,6 +1084,9 @@ int prepare_mappings(struct pstree_item *t)
> if (ret <= 0)
> return -1;
>
> + if (maybe_disable_thp(t, &pr))
> + return -1;
> +
> pr.advance(&pr); /* shift to the 1st iovec */
>
> ret = premap_priv_vmas(t, vmas, &addr, &pr);
> @@ -1196,4 +1228,3 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta)
>
> return prepare_vma_ios(t, ta);
> }
> -
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 7b9c052..28a1129 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -1272,6 +1272,14 @@ long __export_restore_task(struct task_restore_args *args)
> }
>
> if (args->uffd > -1) {
> + /* re-enable THP if we disabled it previously */
> + if (args->has_thp_enabled) {
> + if (sys_prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
> + pr_err("Cannot re-enable THP\n");
> + goto core_restore_end;
> + }
> + }
> +
> pr_debug("lazy-pages: closing uffd %d\n", args->uffd);
> /*
> * All userfaultfd configuration has finished at this point.
>
More information about the CRIU
mailing list