[CRIU] [PATCH v2 11/11] criu: lazy-pages: enable remoting of lazy pages
Mike Rapoport
rppt at linux.vnet.ibm.com
Tue May 31 06:35:01 PDT 2016
The remote lazy pages variant can be run as follows:
src# criu dump -t <pid> --lazy-pages --port 9876 -D /tmp/1 &
src# while ! sudo fuser 9876/tcp ; do sleep 1; done
src# scp -r /tmp/1/ dst:/tmp/
dst# criu lazy-pages --lazy-addr /tmp/uffd.sock --page-server \
--address dst --port 9876 -D /tmp/1 &
dst# criu restore --lazy-pages --lazy-addr /tmp/uffd.sock -D /tmp/1
In a nutshell, this implementation of remote lazy pages does the following:
- dump collects the process memory into the pipes, transfers non-lazy pages
to the images or to the page-server on the restore side. The lazy pages
are kept in pipes for later transfer
- when the dump creates the page_pipe_bufs, it marks the buffers containing
potentially lazy pages with PPB_LAZY
- at the dump_finish stage, the dump side starts TCP server that will
handle page requests from the restore side
- the checkpoint directory is transferred to the restore side
- on the restore side lazy-pages daemon is started, it creates UNIX socket
to receive uffd's from the restore and a TCP socket to forward page
requests to the dump side
- restore creates memory mappings and fills the VMAs that cannot be handled
by uffd with the contents of the pages*img.
- restore registers lazy VMAs with uffd and sends the userfault file
descriptors to the lazy-pages daemon
- when a #PF occurs, the lazy-pages daemon sends PS_IOV_GET command to the dump
side; the command contains PID, the faulting address and amount of pages
(always 1 at the moment)
- the dump side extracts the requested pages from the pipe and splices them
into the TCP socket.
- the lazy-pages daemon copies the received pages into the restored process
address space
Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
criu/cr-dump.c | 36 +++++++++++++++++++++++++++++++++---
criu/include/mem.h | 3 ++-
criu/mem.c | 27 +++++++++++++++++----------
criu/page-read.c | 2 +-
criu/uffd.c | 9 ++++++++-
5 files changed, 61 insertions(+), 16 deletions(-)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index ccebe2f..260cc0b 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1142,7 +1142,7 @@ static int pre_dump_one_task(struct pstree_item *item)
parasite_ctl->pid.virt = item->pid.virt = misc.pid;
- ret = parasite_dump_pages_seized(parasite_ctl, &vmas, true);
+ ret = parasite_dump_pages_seized(parasite_ctl, &vmas, true, false);
if (ret)
goto err_cure;
@@ -1298,7 +1298,8 @@ static int dump_one_task(struct pstree_item *item)
}
}
- ret = parasite_dump_pages_seized(parasite_ctl, &vmas, false);
+ ret = parasite_dump_pages_seized(parasite_ctl, &vmas, opts.lazy_pages,
+ opts.lazy_pages);
if (ret)
goto err_cure;
@@ -1338,7 +1339,10 @@ static int dump_one_task(struct pstree_item *item)
goto err;
}
- ret = parasite_cure_seized(parasite_ctl);
+ if (opts.lazy_pages)
+ ret = parasite_cure_remote(parasite_ctl);
+ else
+ ret = parasite_cure_seized(parasite_ctl);
if (ret) {
pr_err("Can't cure (pid: %d) from parasite\n", pid);
goto err;
@@ -1525,6 +1529,28 @@ err:
return cr_pre_dump_finish(ret);
}
+static int cr_lazy_mem_dump(void)
+{
+ struct pstree_item *item;
+ int ret = 0;
+
+ pr_info("Starting lazy pages server\n");
+ ret = cr_page_server(false, -1);
+
+ for_each_pstree_item(item) {
+ struct parasite_ctl *ctl = item->parasite_ctl;
+ destroy_page_pipe(ctl->mem_pp);
+ parasite_cure_local(ctl);
+ }
+
+ if (ret)
+ pr_err("Lazy pages transfer FAILED.\n");
+ else
+ pr_info("Lazy pages transfer finished successfully\n");
+
+ return ret;
+}
+
static int cr_dump_finish(int ret)
{
int post_dump_ret = 0;
@@ -1583,6 +1609,10 @@ static int cr_dump_finish(int ret)
network_unlock();
delete_link_remaps();
}
+
+ if (opts.lazy_pages)
+ ret = cr_lazy_mem_dump();
+
pstree_switch_state(root_item,
(ret || post_dump_ret) ?
TASK_ALIVE : opts.final_state);
diff --git a/criu/include/mem.h b/criu/include/mem.h
index a9750db..a4696fc 100644
--- a/criu/include/mem.h
+++ b/criu/include/mem.h
@@ -11,7 +11,8 @@ extern int do_task_reset_dirty_track(int pid);
extern unsigned int dump_pages_args_size(struct vm_area_list *vmas);
extern int parasite_dump_pages_seized(struct parasite_ctl *ctl,
struct vm_area_list *vma_area_list,
- bool delayed_dump);
+ bool delayed_dump,
+ bool lazy);
#define PME_PRESENT (1ULL << 63)
#define PME_SWAP (1ULL << 62)
diff --git a/criu/mem.c b/criu/mem.c
index bb2a34d..548a03b 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -220,7 +220,8 @@ static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl
}
static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
- struct parasite_dump_pages_args *args, struct page_xfer *xfer)
+ struct parasite_dump_pages_args *args,
+ struct page_xfer *xfer, bool lazy)
{
struct page_pipe_buf *ppb;
int ret = 0;
@@ -254,7 +255,7 @@ static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
*/
if (xfer) {
timing_start(TIME_MEMWRITE);
- ret = page_xfer_dump_pages(xfer, pp, 0, true);
+ ret = page_xfer_dump_pages(xfer, pp, 0, !lazy);
timing_stop(TIME_MEMWRITE);
}
@@ -264,13 +265,14 @@ static int dump_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
struct parasite_dump_pages_args *args,
struct vm_area_list *vma_area_list,
- bool delayed_dump)
+ bool delayed_dump, bool lazy)
{
pmc_t pmc = PMC_INIT;
struct page_pipe *pp;
struct vma_area *vma_area;
struct page_xfer xfer = { .parent = NULL };
int ret = -1;
+ bool should_xfer = (!delayed_dump || lazy);
pr_info("\n");
pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, ctl->pid.real);
@@ -292,11 +294,12 @@ static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
ret = -1;
ctl->mem_pp = pp = create_page_pipe(vma_area_list->priv_size,
- pargs_iovs(args), !delayed_dump);
+ lazy ? NULL : pargs_iovs(args),
+ !delayed_dump);
if (!pp)
goto out;
- if (!delayed_dump) {
+ if (should_xfer) {
ret = open_page_xfer(&xfer, CR_FD_PAGEMAP, ctl->pid.virt);
if (ret < 0)
goto out_pp;
@@ -334,7 +337,7 @@ again:
if (ret == -EAGAIN) {
BUG_ON(delayed_dump);
- ret = dump_pages(pp, ctl, args, &xfer);
+ ret = dump_pages(pp, ctl, args, &xfer, false);
if (ret)
goto out_xfer;
page_pipe_reinit(pp);
@@ -344,7 +347,10 @@ again:
goto out_xfer;
}
- ret = dump_pages(pp, ctl, args, delayed_dump ? NULL : &xfer);
+ if (lazy)
+ memcpy(pargs_iovs(args), pp->iovs,
+ sizeof(struct iovec) * pp->nr_iovs);
+ ret = dump_pages(pp, ctl, args, should_xfer ? &xfer : NULL, lazy);
if (ret)
goto out_xfer;
@@ -356,7 +362,7 @@ again:
ret = task_reset_dirty_track(ctl->pid.real);
out_xfer:
- if (!delayed_dump)
+ if (should_xfer)
xfer.close(&xfer);
out_pp:
if (ret || !delayed_dump)
@@ -368,7 +374,8 @@ out:
}
int parasite_dump_pages_seized(struct parasite_ctl *ctl,
- struct vm_area_list *vma_area_list, bool delayed_dump)
+ struct vm_area_list *vma_area_list, bool delayed_dump,
+ bool lazy)
{
int ret;
struct parasite_dump_pages_args *pargs;
@@ -396,7 +403,7 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl,
}
ret = __parasite_dump_pages_seized(ctl, pargs, vma_area_list,
- delayed_dump);
+ delayed_dump, lazy);
if (ret) {
pr_err("Can't dump page with parasite\n");
diff --git a/criu/page-read.c b/criu/page-read.c
index e5ec76a..203b170 100644
--- a/criu/page-read.c
+++ b/criu/page-read.c
@@ -92,7 +92,7 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
return;
pr_debug("\tpr%u Skip %lu bytes from page-dump\n", pr->id, len);
- if (!pr->pe->in_parent)
+ if (!pr->pe->in_parent && !opts.lazy_pages)
lseek(img_raw_fd(pr->pi), len, SEEK_CUR);
pr->cvaddr += len;
}
diff --git a/criu/uffd.c b/criu/uffd.c
index c52809b..e30af7f 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -34,6 +34,7 @@
#include "xmalloc.h"
#include "syscall-codes.h"
#include "restorer.h"
+#include "page-xfer.h"
#undef LOG_PREFIX
#define LOG_PREFIX "lazy-pages: "
@@ -370,7 +371,10 @@ static int uffd_copy_page(struct lazy_pages_info *lpi, __u64 address,
struct uffdio_copy uffdio_copy;
int rc;
- rc = get_page(lpi, address, dest);
+ if (opts.use_page_server)
+ rc = get_remote_pages(lpi->pid, address, 1, dest);
+ else
+ rc = get_page(lpi, address, dest);
if (rc <= 0)
return rc;
@@ -868,6 +872,9 @@ int cr_lazy_pages()
if (prepare_uffds(epollfd))
return -1;
+ if (connect_to_page_server())
+ return -1;
+
ret = handle_requests(epollfd, events);
lpi_hash_fini();
--
1.9.1
More information about the CRIU
mailing list