[CRIU] [PATCH v4 12/12] criu: lazy-pages: enable remoting of lazy pages

Mike Rapoport rppt at linux.vnet.ibm.com
Sun Jun 5 23:27:45 PDT 2016


The remote lazy pages variant can be run as follows:

src# criu dump -t <pid> --lazy-pages --port 9876 -D /tmp/1 &
src# while ! sudo fuser 9876/tcp ; do sleep 1; done
src# scp -r /tmp/1/ dst:/tmp/

dst# criu lazy-pages --lazy-addr /tmp/uffd.sock --page-server \
                     --address dst --port 9876 -D /tmp/1 &
dst# criu restore --lazy-pages --lazy-addr /tmp/uffd.sock -D /tmp/1

In a nutshell, this implementation of remote lazy pages does the following:

- dump collects the process memory into the pipes, transfers non-lazy pages
  to the images or to the page-server on the restore side. The lazy pages
  are kept in pipes for later transfer
- when the dump creates the page_pipe_bufs, it marks the buffers containing
potentially lazy pages with PPB_LAZY
- at the dump_finish stage, the dump side starts TCP server that will
handle page requests from the restore side
- the checkpoint directory is transferred to the restore side
- on the restore side lazy-pages daemon is started, it creates UNIX socket
to receive uffd's from the restore and a TCP socket to forward page
requests to the dump side
- restore creates memory mappings and fills the VMAs that cannot be handled
by uffd with the contents of the pages*img.
- restore registers lazy VMAs with uffd and sends the userfault file
descriptors to the lazy-pages daemon
- when a #PF occurs, the lazy-pages daemon sends PS_IOV_GET command to the dump
side; the command contains PID, the faulting address and amount of pages
(always 1 at the moment)
- the dump side extracts the requested pages from the pipe and splices them
into the TCP socket.
- the lazy-pages daemon copies the received pages into the restored process
address space

Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
 criu/cr-dump.c     | 36 +++++++++++++++++++++++++++++++++---
 criu/include/mem.h |  3 ++-
 criu/mem.c         | 27 +++++++++++++++++----------
 criu/page-read.c   |  2 +-
 criu/uffd.c        |  9 ++++++++-
 5 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index ccebe2f..260cc0b 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1142,7 +1142,7 @@ static int pre_dump_one_task(struct pstree_item *item)
 
 	parasite_ctl->pid.virt = item->pid.virt = misc.pid;
 
-	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, true);
+	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, true, false);
 	if (ret)
 		goto err_cure;
 
@@ -1298,7 +1298,8 @@ static int dump_one_task(struct pstree_item *item)
 		}
 	}
 
-	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, false);
+	ret = parasite_dump_pages_seized(parasite_ctl, &vmas, opts.lazy_pages,
+					 opts.lazy_pages);
 	if (ret)
 		goto err_cure;
 
@@ -1338,7 +1339,10 @@ static int dump_one_task(struct pstree_item *item)
 		goto err;
 	}
 
-	ret = parasite_cure_seized(parasite_ctl);
+	if (opts.lazy_pages)
+		ret = parasite_cure_remote(parasite_ctl);
+	else
+		ret = parasite_cure_seized(parasite_ctl);
 	if (ret) {
 		pr_err("Can't cure (pid: %d) from parasite\n", pid);
 		goto err;
@@ -1525,6 +1529,28 @@ err:
 	return cr_pre_dump_finish(ret);
 }
 
+static int cr_lazy_mem_dump(void)
+{
+	struct pstree_item *item;
+	int ret = 0;
+
+	pr_info("Starting lazy pages server\n");
+	ret = cr_page_server(false, -1);
+
+	for_each_pstree_item(item) {
+		struct parasite_ctl *ctl = item->parasite_ctl;
+		destroy_page_pipe(ctl->mem_pp);
+		parasite_cure_local(ctl);
+	}
+
+	if (ret)
+		pr_err("Lazy pages transfer FAILED.\n");
+	else
+		pr_info("Lazy pages transfer finished successfully\n");
+
+	return ret;
+}
+
 static int cr_dump_finish(int ret)
 {
 	int post_dump_ret = 0;
@@ -1583,6 +1609,10 @@ static int cr_dump_finish(int ret)
 		network_unlock();
 		delete_link_remaps();
 	}
+
+	if (opts.lazy_pages)
+		ret = cr_lazy_mem_dump();
+
 	pstree_switch_state(root_item,
 			    (ret || post_dump_ret) ?
 			    TASK_ALIVE : opts.final_state);
diff --git a/criu/include/mem.h b/criu/include/mem.h
index a9750db..a4696fc 100644
--- a/criu/include/mem.h
+++ b/criu/include/mem.h
@@ -11,7 +11,8 @@ extern int do_task_reset_dirty_track(int pid);
 extern unsigned int dump_pages_args_size(struct vm_area_list *vmas);
 extern int parasite_dump_pages_seized(struct parasite_ctl *ctl,
 				      struct vm_area_list *vma_area_list,
-				      bool delayed_dump);
+				      bool delayed_dump,
+				      bool lazy);
 
 #define PME_PRESENT		(1ULL << 63)
 #define PME_SWAP		(1ULL << 62)
diff --git a/criu/mem.c b/criu/mem.c
index bb2a34d..548a03b 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -220,7 +220,8 @@ static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl
 }
 
 static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
-			struct parasite_dump_pages_args *args, struct page_xfer *xfer)
+		      struct parasite_dump_pages_args *args,
+		      struct page_xfer *xfer, bool lazy)
 {
 	struct page_pipe_buf *ppb;
 	int ret = 0;
@@ -254,7 +255,7 @@ static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
 	 */
 	if (xfer) {
 		timing_start(TIME_MEMWRITE);
-		ret = page_xfer_dump_pages(xfer, pp, 0, true);
+		ret = page_xfer_dump_pages(xfer, pp, 0, !lazy);
 		timing_stop(TIME_MEMWRITE);
 	}
 
@@ -264,13 +265,14 @@ static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
 static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 		struct parasite_dump_pages_args *args,
 		struct vm_area_list *vma_area_list,
-		bool delayed_dump)
+		bool delayed_dump, bool lazy)
 {
 	pmc_t pmc = PMC_INIT;
 	struct page_pipe *pp;
 	struct vma_area *vma_area;
 	struct page_xfer xfer = { .parent = NULL };
 	int ret = -1;
+	bool should_xfer = (!delayed_dump || lazy);
 
 	pr_info("\n");
 	pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, ctl->pid.real);
@@ -292,11 +294,12 @@ static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 	ret = -1;
 
 	ctl->mem_pp = pp = create_page_pipe(vma_area_list->priv_size,
-					    pargs_iovs(args), !delayed_dump);
+					    lazy ? NULL : pargs_iovs(args),
+					    !delayed_dump);
 	if (!pp)
 		goto out;
 
-	if (!delayed_dump) {
+	if (should_xfer) {
 		ret = open_page_xfer(&xfer, CR_FD_PAGEMAP, ctl->pid.virt);
 		if (ret < 0)
 			goto out_pp;
@@ -334,7 +337,7 @@ again:
 		if (ret == -EAGAIN) {
 			BUG_ON(delayed_dump);
 
-			ret = dump_pages(pp, ctl, args, &xfer);
+			ret = dump_pages(pp, ctl, args, &xfer, false);
 			if (ret)
 				goto out_xfer;
 			page_pipe_reinit(pp);
@@ -344,7 +347,10 @@ again:
 			goto out_xfer;
 	}
 
-	ret = dump_pages(pp, ctl, args, delayed_dump ? NULL : &xfer);
+	if (lazy)
+		memcpy(pargs_iovs(args), pp->iovs,
+		       sizeof(struct iovec) * pp->nr_iovs);
+	ret = dump_pages(pp, ctl, args, should_xfer ? &xfer : NULL, lazy);
 	if (ret)
 		goto out_xfer;
 
@@ -356,7 +362,7 @@ again:
 
 	ret = task_reset_dirty_track(ctl->pid.real);
 out_xfer:
-	if (!delayed_dump)
+	if (should_xfer)
 		xfer.close(&xfer);
 out_pp:
 	if (ret || !delayed_dump)
@@ -368,7 +374,8 @@ out:
 }
 
 int parasite_dump_pages_seized(struct parasite_ctl *ctl,
-		struct vm_area_list *vma_area_list, bool delayed_dump)
+		struct vm_area_list *vma_area_list, bool delayed_dump,
+		bool lazy)
 {
 	int ret;
 	struct parasite_dump_pages_args *pargs;
@@ -396,7 +403,7 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl,
 	}
 
 	ret = __parasite_dump_pages_seized(ctl, pargs, vma_area_list,
-					   delayed_dump);
+					   delayed_dump, lazy);
 
 	if (ret) {
 		pr_err("Can't dump page with parasite\n");
diff --git a/criu/page-read.c b/criu/page-read.c
index e5ec76a..203b170 100644
--- a/criu/page-read.c
+++ b/criu/page-read.c
@@ -92,7 +92,7 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
 		return;
 
 	pr_debug("\tpr%u Skip %lu bytes from page-dump\n", pr->id, len);
-	if (!pr->pe->in_parent)
+	if (!pr->pe->in_parent && !opts.lazy_pages)
 		lseek(img_raw_fd(pr->pi), len, SEEK_CUR);
 	pr->cvaddr += len;
 }
diff --git a/criu/uffd.c b/criu/uffd.c
index c52809b..e30af7f 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -34,6 +34,7 @@
 #include "xmalloc.h"
 #include "syscall-codes.h"
 #include "restorer.h"
+#include "page-xfer.h"
 
 #undef  LOG_PREFIX
 #define LOG_PREFIX "lazy-pages: "
@@ -370,7 +371,10 @@ static int uffd_copy_page(struct lazy_pages_info *lpi, __u64 address,
 	struct uffdio_copy uffdio_copy;
 	int rc;
 
-	rc = get_page(lpi, address, dest);
+	if (opts.use_page_server)
+		rc = get_remote_pages(lpi->pid, address, 1, dest);
+	else
+		rc = get_page(lpi, address, dest);
 	if (rc <= 0)
 		return rc;
 
@@ -868,6 +872,9 @@ int cr_lazy_pages()
 	if (prepare_uffds(epollfd))
 		return -1;
 
+	if (connect_to_page_server())
+		return -1;
+
 	ret = handle_requests(epollfd, events);
 	lpi_hash_fini();
 
-- 
1.9.1



More information about the CRIU mailing list