[CRIU] [PATCH] UFFD: Support lazy-pages restore between two hosts

Adrian Reber adrian at lisas.de
Thu Mar 24 03:27:13 PDT 2016


From: Adrian Reber <areber at redhat.com>

This enhances lazy-pages mode to work with two different hosts. Instead
of lazy restoring a process on the same host this enables to keep the
memory pages on the source system and actually only transfer the memory
pages on demand from the source to the destination system.

The previous, only on one host, lazy restore consisted of two process.

 criu restore --lazy-pages --address /path/to/unix-domain-socket

and

 criu lazy-pages --address /path/to/unix-domain-socket

The unix domain socket was used to transfer the userfault FD (UFFD) from
the 'criu restore' process to the 'criu lazy-pages' process. The 'criu
lazy-pages' was then listening on the UFFD for userfaultfd messages
which were used to retrieve the requested memory page from the
checkpoint directory and transfer that page into the process to be
restored.

This commit introduces the ability to keep the pages on the remote host
and only request the transfer of the required pages over TCP on demand.
Therefore criu needs to be started differently than previously.

Host1:

   criu restore --lazy-pages --address /path/to/unix-domain-socket

  and

   criu lazy-pages --address /path/to/unix-domain-socket \
   --lazy-client ADDR-Host2 --port 27

Host2:

   criu lazy-pages --lazy-server --port 27

On Host1 the process is now restored (as criu always does) except that
the memory pages are not read from pages.img and that the appropriate
pages are marked as being userfaultfd handled. As soon as the restored
process tries to access one the pages a UFFD MSG is received by the
lazy-client (on Host1). This UFFD MSG is then transferred via TCP to the
lazy-sever (on Host2). The lazy-server retrieves the memory page from
the local checkpoint and returns a UFFDIO COPY answer back to the
lazy-client which can the forward this message to the local UFFD which
inserts the page into the restored process.

The remote lazy restore has the same behavior as the local lazy restore
that, if after 5 seconds no more messages are received on the socket
waiting for UFFD MSG, it switches to copy remaining pages mode, where
all non-UFFD-requested pages are transferred into the restored process.

TODO:
  * Create from the checkpoint directory a checkpoint without the memory
    pages which are UFFD handled. This would enable a real UFFD remote
    restore where the UFFD pages do not need to be transferred to the
    destination host.

Signed-off-by: Adrian Reber <areber at redhat.com>
---
 criu/crtools.c            |  25 ++-
 criu/include/cr_options.h |   2 +
 criu/uffd.c               | 482 ++++++++++++++++++++++++++++++++--------------
 3 files changed, 365 insertions(+), 144 deletions(-)

diff --git a/criu/crtools.c b/criu/crtools.c
index 6785c78..13240e5 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -319,12 +319,14 @@ int main(int argc, char *argv[], char *envp[])
 		{ "external",			required_argument,	0, 1073	},
 		{ "empty-ns",			required_argument,	0, 1074	},
 		{ "unshare",			required_argument,	0, 1075 },
-#ifdef CONFIG_HAS_UFFD
-		{ "lazy-pages",			no_argument,		0, 1076 },
-#endif
 		{ "extra",			no_argument,		0, 1077	},
 		{ "experimental",		no_argument,		0, 1078	},
 		{ "all",			no_argument,		0, 1079	},
+#ifdef CONFIG_HAS_UFFD
+		{ "lazy-pages",			no_argument,		0, 1076 },
+		{ "lazy-server",		no_argument,		0, 1080 },
+		{ "lazy-client",		required_argument,	0, 1081 },
+#endif
 		{ },
 	};
 
@@ -585,6 +587,12 @@ int main(int argc, char *argv[], char *envp[])
 		case 1076:
 			opts.lazy_pages = true;
 			break;
+		case 1080:
+			opts.lazy_server = true;
+			break;
+		case 1081:
+			opts.lazy_client = optarg;
+			break;
 #endif
 		case 'M':
 			{
@@ -929,6 +937,17 @@ usage:
 "  --address ADDR        address of server or service\n"
 "  --port PORT           port of page server\n"
 "  -d|--daemon           run in the background after creating socket\n"
+#ifdef CONFIG_HAS_UFFD
+"  --lazy-server         when running in lazy-pages mode this enables the\n"
+"                        lazy-server mode which is required for remote\n"
+"                        lazy restores\n"
+"                        this option needs --port <number> to specify the\n"
+"                        network port under which the lazy-server will be\n"
+"                        reachable\n"
+" --lazy-client ADDR     address of the lazy-server\n"
+"                        this option needs the --port option to specify the\n"
+"                        network port under which the lazy-server reachable\n"
+#endif
 "\n"
 "Other options:\n"
 "  -h|--help             show this text\n"
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index 4853bea..e433ebb 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -110,6 +110,8 @@ struct cr_options {
 	unsigned int		timeout;
 	unsigned int		empty_ns;
 	bool			lazy_pages;
+	bool			lazy_server;
+	char			*lazy_client;
 };
 
 extern struct cr_options opts;
diff --git a/criu/uffd.c b/criu/uffd.c
index 69403dd..1c3c3a6 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -7,12 +7,16 @@
 #include <fcntl.h>
 #include <string.h>
 #include <time.h>
+#include <arpa/inet.h>
+#include <netinet/ip.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <sys/un.h>
 #include <sys/socket.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
 
 #include "asm/page.h"
 #include "include/log.h"
@@ -242,43 +246,87 @@ struct uffd_pages_struct {
 	int flags;
 };
 
-static int uffd_copy_page(int uffd, __u64 address, void *dest)
-{
+struct remote_uffd {
 	struct uffdio_copy uffdio_copy;
+	int remaining;
+};
+
+static unsigned long vdso_pages;
+static unsigned long total_pages;
+static unsigned long uffd_copied_pages;
+
+static void page_copied_status()
+{
+	/*
+	 * In lazy_client mode the number of total pages is unknown.
+	 * Still print out some status message how many pages have already
+	 * been transferred.
+	 */
+	if (opts.lazy_client)
+		pr_debug("With UFFD transferred pages: (%ld)\n", uffd_copied_pages);
+	else
+		pr_debug("With UFFD transferred pages: (%ld/%ld)\n", uffd_copied_pages,
+			 total_pages);
+}
+
+static int uffd_copy_page(int uffd, __u64 address, void *dest, int remaining)
+{
+	struct remote_uffd remote_uffd;
+	struct stat statbuf;
 	int rc;
 
-	rc = get_page(address, dest);
-	if (rc <= 0)
+	if (fstat(uffd, &statbuf)) {
+		pr_perror("fstat of FD failed: ");
 		return -1;
+	}
 
-	uffdio_copy.dst = address;
-	uffdio_copy.src = (unsigned long) dest;
-	uffdio_copy.len = page_size();
-	uffdio_copy.mode = 0;
-	uffdio_copy.copy = 0;
+	if (!opts.lazy_client) {
+		rc = get_page(address, dest);
+		if (rc <= 0)
+			return -1;
+	}
 
-	pr_debug("uffdio_copy.dst 0x%llx\n", uffdio_copy.dst);
-	rc = ioctl(uffd, UFFDIO_COPY, &uffdio_copy);
+	remote_uffd.uffdio_copy.dst = address;
+	remote_uffd.uffdio_copy.src = (unsigned long) dest;
+	remote_uffd.uffdio_copy.len = page_size();
+	remote_uffd.uffdio_copy.mode = 0;
+	remote_uffd.uffdio_copy.copy = 0;
+	remote_uffd.remaining = remaining;
+
+	pr_debug("uffdio_copy.dst 0x%llx\n", remote_uffd.uffdio_copy.dst);
+	if (S_ISSOCK(statbuf.st_mode)) {
+		pr_debug("FD is a socket\n");
+		if (write(uffd, &remote_uffd, sizeof(remote_uffd)) != sizeof(remote_uffd)) {
+			pr_perror("Can't write to lazy client");
+			return -1;
+		}
+		if (write(uffd, dest, remote_uffd.uffdio_copy.len) != remote_uffd.uffdio_copy.len) {
+			pr_perror("Can't write to lazy client");
+			return -1;
+		}
+		return sizeof(remote_uffd.uffdio_copy);
+	} else {
+		rc = ioctl(uffd, UFFDIO_COPY, &remote_uffd.uffdio_copy);
+	}
 	pr_debug("ioctl UFFDIO_COPY rc 0x%x\n", rc);
-	pr_debug("uffdio_copy.copy 0x%llx\n", uffdio_copy.copy);
+	pr_debug("uffdio_copy.copy 0x%llx\n", remote_uffd.uffdio_copy.copy);
 	if (rc) {
-		/* real retval in ufdio_copy.copy */
-		if (uffdio_copy.copy != -EEXIST) {
-			pr_err("UFFDIO_COPY error %Ld\n", uffdio_copy.copy);
+		/* real retval in remote_uffd.ufdio_copy.copy */
+		if (remote_uffd.uffdio_copy.copy != -EEXIST) {
+			pr_err("UFFDIO_COPY error %Ld\n", remote_uffd.uffdio_copy.copy);
 			return -1;
 		}
-	} else if (uffdio_copy.copy != page_size()) {
-		pr_err("UFFDIO_COPY unexpected size %Ld\n", uffdio_copy.copy);
+	} else if (remote_uffd.uffdio_copy.copy != page_size()) {
+		pr_err("UFFDIO_COPY unexpected size %Ld\n", remote_uffd.uffdio_copy.copy);
 		return -1;
 	}
 
 
-	return uffdio_copy.copy;
+	return remote_uffd.uffdio_copy.copy;
 
 }
 
-static int collect_uffd_pages(struct page_read *pr, struct list_head *uffd_list,
-			      unsigned long *vma_size)
+static int collect_uffd_pages(struct page_read *pr, struct list_head *uffd_list)
 {
 	unsigned long base;
 	int i;
@@ -332,63 +380,59 @@ static int collect_uffd_pages(struct page_read *pr, struct list_head *uffd_list,
 
 		pr_debug("Adding 0x%lx to our list\n", base);
 
-		*vma_size += ps;
 		uffd_pages = xzalloc(sizeof(struct uffd_pages_struct));
 		if (!uffd_pages)
 			return -1;
 		uffd_pages->addr = base;
-		if (uffd_vdso)
+		if (uffd_vdso) {
 			uffd_pages->flags |= UFFD_FLAG_VDSO;
+			vdso_pages++;
+		}
 		list_add(&uffd_pages->list, uffd_list);
 	}
 
 	return 1;
 }
 
-static int handle_remaining_pages(int uffd, struct list_head *uffd_list, unsigned long *vma_size,
-				  void *dest)
+static int handle_remaining_pages(int uffd, struct list_head *uffd_list, void *dest)
 {
-	unsigned long uffd_copied_pages = 0;
 	struct uffd_pages_struct *uffd_pages;
 	int rc;
 
-	pr_debug("remaining vma_size: 0x%lx\n", *vma_size);
-	pr_debug("uffd_copied_pages:    %ld\n", uffd_copied_pages);
-
 	list_for_each_entry(uffd_pages, uffd_list, list) {
 		pr_debug("Checking remaining pages 0x%lx (flags 0x%x)\n",
 			 uffd_pages->addr, uffd_pages->flags);
 		if (uffd_pages->flags & UFFD_FLAG_SENT)
 			continue;
 
-		rc = uffd_copy_page(uffd, uffd_pages->addr, dest);
+		rc = uffd_copy_page(uffd, uffd_pages->addr, dest,
+				    total_pages - ++uffd_copied_pages);
 		if (rc < 0) {
 			pr_err("Error during UFFD copy\n");
 			return -1;
 		}
-		*vma_size -= rc;
 
-		pr_debug("remaining vma_size: 0x%lx\n", *vma_size);
-		uffd_copied_pages++;
+		page_copied_status();
 		uffd_pages->flags |= UFFD_FLAG_SENT;
 	}
 
-	return uffd_copied_pages;
+	return 0;
 }
 
 
-static int handle_regular_pages(int uffd, struct list_head *uffd_list, unsigned long *vma_size,
-				void *dest, __u64 address)
+static int handle_regular_pages(int uffd, struct list_head *uffd_list, void *dest, __u64 address)
 {
 	int rc;
 	struct uffd_pages_struct *uffd_pages;
 
-	rc = uffd_copy_page(uffd, address, dest);
+	rc = uffd_copy_page(uffd, address, dest, 0);
 	if (rc < 0) {
 		pr_err("Error during UFFD copy\n");
 		return -1;
 	}
-	*vma_size -= rc;
+
+	uffd_copied_pages++;
+	page_copied_status();
 
 	/*
 	 * Mark this page as having been already transferred, so
@@ -399,44 +443,45 @@ static int handle_regular_pages(int uffd, struct list_head *uffd_list, unsigned
 			uffd_pages->flags |= UFFD_FLAG_SENT;
 	}
 
-
-	return 1;
+	return 0;
 }
 
-static int handle_vdso_pages(int uffd, struct list_head *uffd_list, unsigned long *vma_size,
-			     void *dest)
+static int handle_vdso_pages(int uffd, struct list_head *uffd_list, void *dest)
 {
 	int rc;
 	struct uffd_pages_struct *uffd_pages;
-	int uffd_copied_pages = 0;
 
 	list_for_each_entry(uffd_pages, uffd_list, list) {
 		if (!(uffd_pages->flags & UFFD_FLAG_VDSO))
 			continue;
-		rc = uffd_copy_page(uffd, uffd_pages->addr, dest);
+		rc = uffd_copy_page(uffd, uffd_pages->addr, dest, --vdso_pages);
 		if (rc < 0) {
 			pr_err("Error during UFFD copy\n");
 			return -1;
 		}
-		*vma_size -= rc;
 		uffd_copied_pages++;
+		page_copied_status();
 		uffd_pages->flags |= UFFD_FLAG_SENT;
 	}
-	return uffd_copied_pages;
+	return 0;
 }
 
 /*
  *  Setting up criu infrastructure and scan for VMAs.
  */
-static int find_vmas()
+static int find_vmas(struct list_head *uffd_list)
 {
 	struct cr_img *img;
 	int ret;
 	struct vm_area_list vmas;
 	int vn = 0;
 	struct rst_info *ri;
+	struct page_read pr;
+	struct uffd_pages_struct *uffd_pages;
 
-	LIST_HEAD(uffd_list);
+	/* No need to scan for VMAs in lazy_client mode. */
+	if (opts.lazy_client)
+		return 0;
 
 	if (check_img_inventory() == -1)
 		return -1;
@@ -471,7 +516,7 @@ static int find_vmas()
 		ret = -1;
 		vma = alloc_vma_area();
 		if (!vma)
-			break;
+			goto out;
 
 		ret = 0;
 		ri->vmas.nr++;
@@ -485,93 +530,126 @@ static int find_vmas()
 				vmas.priv_size += PAGE_SIZE;
 		}
 
-		pr_info("vma 0x%"PRIx64" 0x%"PRIx64"\n", vma->e->start, vma->e->end);
+		pr_info("vma 0x%" PRIx64 " 0x%" PRIx64 "\n", vma->e->start, vma->e->end);
 	}
 
+	ret = open_page_read(pid, &pr, PR_TASK);
+	if (ret <= 0) {
+		ret = -1;
+		goto out;
+	}
+	/*
+	 * This puts all pages which should be handled by userfaultfd
+	 * in the list uffd_list. This list is later used to detect if
+	 * a page has already been transferred or if it needs to be
+	 * pushed into the process using userfaultfd.
+	 */
+	do {
+		ret = collect_uffd_pages(&pr, uffd_list);
+		if (ret == -1) {
+			goto out;
+		}
+	} while (ret);
+
+	if (pr.close)
+		pr.close(&pr);
+
+	/* Count detected pages */
+	list_for_each_entry(uffd_pages, uffd_list, list)
+	    ret++;
+
+	pr_debug("Found %d pages to be handled by UFFD\n", ret);
+
+out:
 	return ret;
 }
 
-int uffd_listen()
+static int receive_loop(int fd, int lazy_server, void *dest)
 {
-	__u64 address;
-	void *dest;
-	__u64 flags;
-	struct uffd_msg msg;
-	struct page_read pr;
-	unsigned long ps;
+	struct remote_uffd remote_uffd;
+	int read_counter;
 	int rc;
-	fd_set set;
-	struct timeval timeout;
-	int uffd;
-	unsigned long uffd_copied_pages = 0;
-	unsigned long total_pages = 0;
-	int uffd_flags;
-	struct uffd_pages_struct *uffd_pages;
-	bool vdso_sent = false;
-	unsigned long vma_size = 0;
+	unsigned long ps;
 
-	LIST_HEAD(uffd_list);
+	ps = page_size();
 
-	if (!opts.addr) {
-		pr_info("Please specify a file name for the unix domain socket\n");
-		pr_info("used to communicate between the lazy-pages server\n");
-		pr_info("and the restore process. Use the --address option like\n");
-		pr_info("criu --lazy-pages --address /tmp/userfault.socket\n");
+	do {
+		/* Let's wait for the answer. Should be sizeof(remote_uffd) */
+		if (read(lazy_server, &remote_uffd, sizeof(remote_uffd)) != sizeof(remote_uffd)) {
+			pr_perror("Can't read from lazy server");
+			return -1;
+		}
+		pr_debug("from lazy server: uffdio_copy.dst 0x%llx\n", remote_uffd.uffdio_copy.dst);
+		/* And now the actual data, should be exactly a page */
+		if (remote_uffd.uffdio_copy.len != ps) {
+			pr_err("uffdio_copy.len should never be != page size (%ld)\n", ps);
+			return -1;
+		}
+		read_counter = 0;
+		while (read_counter < ps) {
+			rc = read(lazy_server, dest + read_counter, ps - read_counter);
+			if (rc == -1) {
+				pr_perror("Can't read from lazy server");
+				return -1;
+			}
+			read_counter += rc;
+			pr_debug("read_counter %d\n", read_counter);
+		}
+		rc = uffd_copy_page(fd, remote_uffd.uffdio_copy.dst, dest, 0);
+		if (rc < 0) {
+			pr_err("Error during UFFD copy\n");
+			return -1;
+		}
+		uffd_copied_pages++;
+		page_copied_status();
+		pr_debug("remote_uffd.remaining: %d\n", remote_uffd.remaining);
+	} while (remote_uffd.remaining != 0);
+
+	return 0;
+}
+
+static int handle_remote_requests(int fd, int lazy_server, struct uffd_msg *msg, void *dest)
+{
+	pr_debug("Sending userfaultfd msg to lazy server\n");
+
+	if (write(lazy_server, msg, sizeof(*msg)) != sizeof(*msg)) {
+		pr_perror("Can't write to lazy server");
 		return -1;
 	}
 
-	pr_debug("Waiting for incoming connections on %s\n", opts.addr);
-	if ((uffd = ud_open()) < 0)
-		exit(0);
+	return receive_loop(fd, lazy_server, dest);
+}
 
-	pr_debug("uffd is 0x%d\n", uffd);
-	uffd_flags = fcntl(uffd, F_GETFD, NULL);
-	pr_debug("uffd_flags are 0x%x\n", uffd_flags);
+static int handle_requests(int fd, int lazy_server)
+{
+	fd_set set;
+	int ret = -1;
+	struct uffd_msg msg;
+	__u64 flags;
+	__u64 address;
+	unsigned long ps;
+	struct timeval timeout;
+	struct uffd_pages_struct *uffd_pages;
+	void *dest;
+	bool vdso_sent = false;
+
+	LIST_HEAD(uffd_list);
 
 	/*
 	 * Find the memory pages belonging to the restored process
 	 * so that it is trackable when all pages have been transferred.
 	 */
-	if (find_vmas() == -1)
+	if ((total_pages = find_vmas(&uffd_list)) == -1)
 		return -1;
 
 	/* Initialize FD sets for read() with timeouts (using select()) */
 	FD_ZERO(&set);
-	FD_SET(uffd, &set);
+	FD_SET(fd, &set);
 
 	/* All operations will be done on page size */
 	ps = page_size();
 	dest = malloc(ps);
 
-	rc = open_page_read(pid, &pr, PR_TASK);
-	if (rc <= 0) {
-		rc = 1;
-		goto out;
-	}
-	/*
-	 * This puts all pages which should be handled by userfaultfd
-	 * in the list uffd_list. This list is later used to detect if
-	 * a page has already been transferred or if it needs to be
-	 * pushed into the process using userfaultfd.
-	 */
-	do {
-		rc = collect_uffd_pages(&pr, &uffd_list, &vma_size);
-		if (rc == -1) {
-			rc = 1;
-			goto out;
-		}
-	} while (rc);
-
-	if (pr.close)
-		pr.close(&pr);
-
-
-	/* Count detected pages */
-	list_for_each_entry(uffd_pages, &uffd_list, list)
-	    total_pages++;
-
-	pr_debug("Found %ld pages to be handled by UFFD\n", total_pages);
-
 	while (1) {
 		bool page_sent = false;
 		/*
@@ -585,24 +663,44 @@ int uffd_listen()
 		 */
 		timeout.tv_sec = 5;
 		timeout.tv_usec = 0;
-		rc = select(uffd + 1, &set, NULL, NULL, &timeout);
-		pr_debug("select() rc: 0x%x\n", rc);
-		if (rc == 0) {
+		/*
+		 * If lazy-pages is running in lazy_server mode the timeout
+		 * needs to be longer than on the lazy_client side to make
+		 * sure the lazy_server does not end the whole thing
+		 * before the lazy_client is done/ready.
+		 */
+		if (lazy_server > 0)
+			timeout.tv_sec++;
+
+		ret = select(fd + 1, &set, NULL, NULL, &timeout);
+		pr_debug("select() rc: 0x%x\n", ret);
+		if (ret == 0) {
 			pr_debug("read timeout\n");
 			pr_debug("switching from request to copy mode\n");
 			break;
 		}
-		rc = read(uffd, &msg, sizeof(msg));
-		pr_debug("read() rc: 0x%x\n", rc);
-
-		if (rc != sizeof(msg)) {
-			if (rc < 0)
-				pr_perror("read error");
-			else
-				pr_debug("short read\n");
-			continue;
+		ret = read(fd, &msg, sizeof(msg));
+		pr_debug("read() ret: 0x%x\n", ret);
+		if (!ret)
+			break;
+
+		if (ret != sizeof(msg)) {
+			pr_perror("Can't read userfaultfd message from socket");
+			ret = -1;
+			break;
+		}
+
+
+		if (lazy_server > 0) {
+			ret = handle_remote_requests(fd, lazy_server, &msg, dest);
+			if (ret >= 0)
+				continue;
+			ret = -1;
+			goto out;
+
 		}
 
+		ret = 0;
 		/* Align requested address to the next page boundary */
 		address = msg.arg.pagefault.address & ~(ps - 1);
 		pr_debug("msg.arg.pagefault.address 0x%llx\n", address);
@@ -613,13 +711,12 @@ int uffd_listen()
 		 */
 		if (!vdso_sent) {
 			pr_debug("Pushing VDSO pages once\n");
-			rc = handle_vdso_pages(uffd, &uffd_list, &vma_size, dest);
-			if (rc < 0) {
+			ret = handle_vdso_pages(fd, &uffd_list, dest);
+			if (ret < 0) {
 				pr_err("Error during VDSO handling\n");
-				rc = 1;
+				ret = -1;
 				goto out;
 			}
-			uffd_copied_pages += rc;
 			vdso_sent = true;
 		}
 
@@ -641,41 +738,144 @@ int uffd_listen()
 
 		if (msg.event != UFFD_EVENT_PAGEFAULT) {
 			pr_err("unexpected msg event %u\n", msg.event);
-			rc = 1;
+			ret = -1;
 			goto out;
 		}
 
-		rc = handle_regular_pages(uffd, &uffd_list, &vma_size, dest, address);
-		if (rc < 0) {
+		ret = handle_regular_pages(fd, &uffd_list, dest, address);
+		if (ret < 0) {
 			pr_err("Error during regular page copy\n");
-			rc = 1;
+			ret = -1;
 			goto out;
 		}
-
-		uffd_copied_pages += rc;
-
 	}
 	pr_debug("Handle remaining pages\n");
-	rc = handle_remaining_pages(uffd, &uffd_list, &vma_size, dest);
-	if (rc < 0) {
+	if (lazy_server > 0)
+		ret = receive_loop(fd, lazy_server, dest);
+	else
+		ret = handle_remaining_pages(fd, &uffd_list, dest);
+	if (ret < 0) {
 		pr_err("Error during remaining page copy\n");
-		rc = 1;
+		ret = 1;
 		goto out;
 	}
 
-	uffd_copied_pages += rc;
-	pr_debug("With UFFD transferred pages: (%ld/%ld)\n", uffd_copied_pages, total_pages);
-	if (uffd_copied_pages != total_pages) {
+	page_copied_status();
+	if ((uffd_copied_pages != total_pages) && (total_pages > 0)) {
 		pr_warn("Only %ld of %ld pages transferred via UFFD\n", uffd_copied_pages,
 			total_pages);
 		pr_warn("Something probably went wrong.\n");
-		rc = 1;
+		ret = 1;
 		goto out;
 	}
-	rc = 0;
+	ret = 0;
 
 out:
 	free(dest);
-	close(uffd);
-	return rc;
+	close(fd);
+	return ret;
+
+}
+
+static int lazy_server_serve(int sk)
+{
+	int ret;
+
+	/* The "transfer protocol" is first the pid as int and then the rest */
+	ret = recv(sk, &pid, sizeof(pid), 0);
+	if (ret != sizeof(pid)) {
+		pr_perror("PID recv error:");
+		return -1;
+	}
+	pr_debug("received PID: %d\n", pid);
+
+	return handle_requests(sk, 0);
+
+}
+
+static int lazy_server()
+{
+	int sk = -1;
+	int ask = -1;
+	int ret = 0;
+
+	sk = setup_tcp_server("lazy");
+	if (sk == -1)
+		return -1;
+
+	ret = run_tcp_server(opts.daemon_mode, &ask, -1, sk);
+	if (ret != 0)
+		return ret;
+
+	if (ask >= 0)
+		ret = lazy_server_serve(ask);
+
+	if (opts.daemon_mode)
+		exit(ret);
+
+	return ret;
+}
+
+static int init_lazy_client()
+{
+	int lazy_server_sk;
+
+	lazy_server_sk = setup_tcp_client(opts.lazy_client);
+	if (lazy_server_sk == -1)
+		return -1;
+
+	/*
+	 * CORK the socket at the very beginning. As per ANK
+	 * the corked by default socket with sporadic NODELAY-s
+	 * on urgent data is the smartest mode ever.
+	 */
+	tcp_cork(lazy_server_sk, true);
+	return lazy_server_sk;
+}
+
+int uffd_listen()
+{
+	int uffd;
+	int uffd_flags;
+	int lazy_server_sk = 0;
+
+	LIST_HEAD(uffd_list);
+
+	if (opts.lazy_server)
+		return lazy_server();
+
+	if (opts.lazy_client)
+		lazy_server_sk = init_lazy_client();
+
+	if (lazy_server_sk == -1) {
+		pr_info("Connection to the lazy server failed. Exiting\n");
+		return -1;
+	}
+
+	if (!opts.addr) {
+		pr_info("Please specify a file name for the unix domain socket\n");
+		pr_info("used to communicate between the lazy-pages server\n");
+		pr_info("and the restore process. Use the --address option like\n");
+		pr_info("criu --lazy-pages --address /tmp/userfault.socket\n");
+		return -1;
+	}
+
+	pr_debug("Waiting for incoming connections on %s\n", opts.addr);
+	if ((uffd = ud_open()) < 0)
+		return -1;
+
+	pr_debug("uffd is 0x%d\n", uffd);
+	uffd_flags = fcntl(uffd, F_GETFD, NULL);
+	pr_debug("uffd_flags are 0x%x\n", uffd_flags);
+
+	if (lazy_server_sk) {
+		/* The "transfer protocol" is first the pid as int and then the rest */
+		pr_debug("Sending PID %d\n", pid);
+		if (send(lazy_server_sk, &pid, sizeof(pid), 0) < 0) {
+			pr_perror("PID sending error:");
+			return -1;
+		}
+	}
+
+	return handle_requests(uffd, lazy_server_sk);
 }
-- 
1.8.3.1



More information about the CRIU mailing list