[CRIU] [PATCH RFC 6/8] criu: page-xfer: add PS_IOV_GET interface

Mike Rapoport rppt at linux.vnet.ibm.com
Sat May 21 03:49:40 PDT 2016


When dump side is acting as lazy pages server it should be able to respond
to random page access requests
The protocol is quite simple:
- the restore sends PS_IOV_GET command with PID, address and number
  of pages it wishes to get
- the dump side replies with PS_IOV_GET command. The nr_pages field is
  updated to reflect actual amount of pages that the dump side is going to
  send. If the pages in question are mapped to zero pfn, the entire
  PS_IOV_GET reply is zeroed.
- After the PS_IOV_GET command the dump side sends actual page data

Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
 criu/include/page-xfer.h |  2 +
 criu/page-xfer.c         | 99 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 93 insertions(+), 8 deletions(-)

diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index fb222c3..25a9da7 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -44,4 +44,6 @@ extern int disconnect_from_page_server(void);
 
 extern int check_parent_page_xfer(int fd_type, long id);
 
+extern int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest);
+
 #endif /* __CR_PAGE_XFER__H__ */
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index c1716a5..bc5472e 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -16,6 +16,8 @@
 #include "util.h"
 #include "protobuf.h"
 #include "images/pagemap.pb-c.h"
+#include "pstree.h"
+#include "parasite-syscall.h"
 
 struct page_server_iov {
 	u32	cmd;
@@ -43,6 +45,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id);
 #define PS_IOV_OPEN	3
 #define PS_IOV_OPEN2	4
 #define PS_IOV_PARENT	5
+#define PS_IOV_GET	6
 
 #define PS_IOV_FLUSH		0x1023
 #define PS_IOV_FLUSH_N_CLOSE	0x1024
@@ -176,6 +179,46 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
 	return 0;
 }
 
+static int page_server_get_pages(int sk, struct page_server_iov *pi)
+{
+	struct pstree_item *item;
+	struct page_pipe *pp;
+	struct page_pipe_buf *ppb;
+	struct iovec *iov;
+	int ret;
+
+	item = pstree_item_by_virt(pi->dst_id);
+	pp = item->parasite_ctl->mem_pp;
+
+	ret = page_pipe_split(pp, pi->vaddr, &pi->nr_pages);
+	if (ret)
+		return ret;
+
+	if (pi->nr_pages == 0) {
+		/* no iovs found means we've hit a zero page */
+		pr_debug("no iovs found, zero pages\n");
+		memset(pi, 0, sizeof(*pi));
+
+		return write(sk, pi, sizeof(*pi)) != sizeof(*pi);
+	}
+
+	ppb = list_first_entry(&pp->bufs, struct page_pipe_buf, l);
+	iov = &ppb->iov[0];
+
+	BUG_ON(!(ppb->flags & PPB_LAZY));
+	BUG_ON(iov->iov_len != pi->nr_pages * PAGE_SIZE);
+	BUG_ON(pi->vaddr != encode_pointer(iov->iov_base));
+
+	if (write(sk, pi, sizeof(*pi)) != sizeof(*pi))
+		return -1;
+
+	ret = splice(ppb->p[0], NULL, sk, NULL, iov->iov_len, SPLICE_F_MOVE);
+	if (ret != iov->iov_len)
+		return -1;
+
+	return 0;
+}
+
 static int page_server_check_parent(int sk, struct page_server_iov *pi);
 
 static int page_server_serve(int sk)
@@ -190,14 +233,16 @@ static int page_server_serve(int sk)
 	 */
 	tcp_nodelay(sk, true);
 
-	if (pipe(cxfer.p)) {
-		pr_perror("Can't make pipe for xfer");
-		close(sk);
-		return -1;
-	}
+	if (!opts.lazy_pages) {
+		if (pipe(cxfer.p)) {
+			pr_perror("Can't make pipe for xfer");
+			close(sk);
+			return -1;
+		}
 
-	cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
-	pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
+		cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
+		pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
+	}
 
 	while (1) {
 		struct page_server_iov pi;
@@ -249,6 +294,10 @@ static int page_server_serve(int sk)
 			flushed = true;
 			break;
 		}
+		case PS_IOV_GET:
+			flushed = true;
+			ret = page_server_get_pages(sk, &pi);
+			break;
 		default:
 			pr_err("Unknown command %u\n", pi.cmd);
 			ret = -1;
@@ -291,7 +340,8 @@ int cr_page_server(bool daemon_mode, int cfd)
 	int sk = -1;
 	int ret;
 
-	up_page_ids_base();
+	if (!opts.lazy_pages)
+		up_page_ids_base();
 
 	if (opts.ps_socket != -1) {
 		ret = 0;
@@ -787,3 +837,36 @@ int check_parent_page_xfer(int fd_type, long id)
 	else
 		return check_parent_local_xfer(fd_type, id);
 }
+
+int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest)
+{
+	int ret;
+
+	struct page_server_iov pi = {
+		.cmd = PS_IOV_GET,
+		.nr_pages = nr_pages,
+		.vaddr = addr,
+		.dst_id = pid,
+	};
+
+	ret = write(page_server_sk, &pi, sizeof(pi));
+	if (ret != sizeof(pi))
+		return -1;
+
+	ret = recv(page_server_sk, &pi, sizeof(pi), MSG_WAITALL);
+	if (ret != sizeof(pi))
+		return -1;
+
+	/* zero page */
+	if (pi.cmd == 0 && pi.vaddr == 0 && pi.nr_pages == 0 && pi.dst_id == 0)
+		return 0;
+
+	if (pi.nr_pages > nr_pages)
+		return -1;
+
+	ret = recv(page_server_sk, dest, PAGE_SIZE, MSG_WAITALL);
+	if (ret != PAGE_SIZE)
+		return -1;
+
+	return 1;
+}
-- 
1.9.1



More information about the CRIU mailing list