[CRIU] [PATCH v3 07/11] criu: page-xfer: add PS_IOV_GET interface

Mike Rapoport rapoport at linux.vnet.ibm.com
Tue May 31 14:01:40 PDT 2016


From: Mike Rapoport <rppt at linux.vnet.ibm.com>

When dump side is acting as lazy pages server it should be able to respond
to random page access requests
The protocol is quite simple:
- the restore sends PS_IOV_GET command with PID, address and number
  of pages it wishes to get
- if the pages at the requested address are mapped to zero pfn, the dump
  side replies with PS_IOV_ZERO
- otherwise, the dump side replies with PS_IOV_ADD command. The nr_pages
  field is updated to reflect actual amount of pages that the dump side is
  going to send.
- After the PS_IOV_ADD command the dump side sends actual page data

Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
 criu/include/page-xfer.h |   2 +
 criu/page-xfer.c         | 102 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 96 insertions(+), 8 deletions(-)

diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index fb222c3..25a9da7 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -44,4 +44,6 @@ extern int disconnect_from_page_server(void);
 
 extern int check_parent_page_xfer(int fd_type, long id);
 
+extern int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest);
+
 #endif /* __CR_PAGE_XFER__H__ */
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 786516e..38d43e1 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -16,6 +16,8 @@
 #include "util.h"
 #include "protobuf.h"
 #include "images/pagemap.pb-c.h"
+#include "pstree.h"
+#include "parasite-syscall.h"
 
 static int page_server_sk = -1;
 
@@ -37,6 +39,8 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov)
 #define PS_IOV_OPEN	3
 #define PS_IOV_OPEN2	4
 #define PS_IOV_PARENT	5
+#define PS_IOV_GET	6
+#define PS_IOV_ZERO	7
 
 #define PS_IOV_FLUSH		0x1023
 #define PS_IOV_FLUSH_N_CLOSE	0x1024
@@ -572,6 +576,44 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
 	return 0;
 }
 
+static int page_server_get_pages(int sk, struct page_server_iov *pi)
+{
+	struct pstree_item *item;
+	struct page_pipe *pp;
+	struct page_pipe_buf *ppb;
+	struct iovec *iov;
+	int ret;
+
+	item = pstree_item_by_virt(pi->dst_id);
+	pp = item->parasite_ctl->mem_pp;
+
+	ret = page_pipe_split(pp, pi->vaddr, &pi->nr_pages);
+	if (ret)
+		return ret;
+
+	if (pi->nr_pages == 0) {
+		/* no iovs found means we've hit a zero page */
+		pr_debug("no iovs found, zero pages\n");
+		return send_psi(sk, PS_IOV_ZERO, 0, 0, 0);
+	}
+
+	ppb = list_first_entry(&pp->bufs, struct page_pipe_buf, l);
+	iov = &ppb->iov[0];
+
+	BUG_ON(!(ppb->flags & PPB_LAZY));
+	BUG_ON(iov->iov_len != pi->nr_pages * PAGE_SIZE);
+	BUG_ON(pi->vaddr != encode_pointer(iov->iov_base));
+
+	if (send_psi(sk, PS_IOV_ADD, pi->nr_pages, pi->vaddr, pi->dst_id))
+		return -1;
+
+	ret = splice(ppb->p[0], NULL, sk, NULL, iov->iov_len, SPLICE_F_MOVE);
+	if (ret != iov->iov_len)
+		return -1;
+
+	return 0;
+}
+
 static int page_server_serve(int sk)
 {
 	int ret = -1;
@@ -584,14 +626,16 @@ static int page_server_serve(int sk)
 	 */
 	tcp_nodelay(sk, true);
 
-	if (pipe(cxfer.p)) {
-		pr_perror("Can't make pipe for xfer");
-		close(sk);
-		return -1;
-	}
+	if (!opts.lazy_pages) {
+		if (pipe(cxfer.p)) {
+			pr_perror("Can't make pipe for xfer");
+			close(sk);
+			return -1;
+		}
 
-	cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
-	pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
+		cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
+		pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
+	}
 
 	while (1) {
 		struct page_server_iov pi;
@@ -643,6 +687,10 @@ static int page_server_serve(int sk)
 			flushed = true;
 			break;
 		}
+		case PS_IOV_GET:
+			flushed = true;
+			ret = page_server_get_pages(sk, &pi);
+			break;
 		default:
 			pr_err("Unknown command %u\n", pi.cmd);
 			ret = -1;
@@ -685,7 +733,8 @@ int cr_page_server(bool daemon_mode, int cfd)
 	int sk = -1;
 	int ret;
 
-	up_page_ids_base();
+	if (!opts.lazy_pages)
+		up_page_ids_base();
 
 	if (opts.ps_socket != -1) {
 		ret = 0;
@@ -775,3 +824,40 @@ out:
 	close_safe(&page_server_sk);
 	return ret ? : status;
 }
+
+int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest)
+{
+	int ret;
+
+	struct page_server_iov pi;
+ /* = { */
+ /* 		.cmd = PS_IOV_GET, */
+ /* 		.nr_pages = nr_pages, */
+ /* 		.vaddr = addr, */
+ /* 		.dst_id = pid, */
+ /* 	}; */
+
+	/* ret = write(page_server_sk, &pi, sizeof(pi)); */
+	/* if (ret != sizeof(pi)) */
+	/* 	return -1; */
+
+	if (send_psi(page_server_sk, PS_IOV_GET, nr_pages, addr, pid))
+		return -1;
+
+	ret = recv(page_server_sk, &pi, sizeof(pi), MSG_WAITALL);
+	if (ret != sizeof(pi))
+		return -1;
+
+	/* zero page */
+	if (pi.cmd == PS_IOV_ZERO)
+		return 0;
+
+	if (pi.nr_pages > nr_pages)
+		return -1;
+
+	ret = recv(page_server_sk, dest, PAGE_SIZE, MSG_WAITALL);
+	if (ret != PAGE_SIZE)
+		return -1;
+
+	return 1;
+}
-- 
1.9.1



More information about the CRIU mailing list