[CRIU] [PATCH v3 07/11] criu: page-xfer: add PS_IOV_GET interface
Mike Rapoport
rapoport at linux.vnet.ibm.com
Tue May 31 14:01:40 PDT 2016
From: Mike Rapoport <rppt at linux.vnet.ibm.com>
When dump side is acting as lazy pages server it should be able to respond
to random page access requests
The protocol is quite simple:
- the restore sends PS_IOV_GET command with PID, address and number
of pages it wishes to get
- if the pages at the requested address are mapped to zero pfn, the dump
side replies with PS_IOV_ZERO
- otherwise, the dump side replies with PS_IOV_ADD command. The nr_pages
field is updated to reflect actual amount of pages that the dump side is
going to send.
- After the PS_IOV_ADD command the dump side sends actual page data
Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
criu/include/page-xfer.h | 2 +
criu/page-xfer.c | 102 +++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 96 insertions(+), 8 deletions(-)
diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index fb222c3..25a9da7 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -44,4 +44,6 @@ extern int disconnect_from_page_server(void);
extern int check_parent_page_xfer(int fd_type, long id);
+extern int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest);
+
#endif /* __CR_PAGE_XFER__H__ */
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 786516e..38d43e1 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -16,6 +16,8 @@
#include "util.h"
#include "protobuf.h"
#include "images/pagemap.pb-c.h"
+#include "pstree.h"
+#include "parasite-syscall.h"
static int page_server_sk = -1;
@@ -37,6 +39,8 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov)
#define PS_IOV_OPEN 3
#define PS_IOV_OPEN2 4
#define PS_IOV_PARENT 5
+#define PS_IOV_GET 6
+#define PS_IOV_ZERO 7
#define PS_IOV_FLUSH 0x1023
#define PS_IOV_FLUSH_N_CLOSE 0x1024
@@ -572,6 +576,44 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
return 0;
}
+static int page_server_get_pages(int sk, struct page_server_iov *pi)
+{
+ struct pstree_item *item;
+ struct page_pipe *pp;
+ struct page_pipe_buf *ppb;
+ struct iovec *iov;
+ int ret;
+
+ item = pstree_item_by_virt(pi->dst_id);
+ pp = item->parasite_ctl->mem_pp;
+
+ ret = page_pipe_split(pp, pi->vaddr, &pi->nr_pages);
+ if (ret)
+ return ret;
+
+ if (pi->nr_pages == 0) {
+ /* no iovs found means we've hit a zero page */
+ pr_debug("no iovs found, zero pages\n");
+ return send_psi(sk, PS_IOV_ZERO, 0, 0, 0);
+ }
+
+ ppb = list_first_entry(&pp->bufs, struct page_pipe_buf, l);
+ iov = &ppb->iov[0];
+
+ BUG_ON(!(ppb->flags & PPB_LAZY));
+ BUG_ON(iov->iov_len != pi->nr_pages * PAGE_SIZE);
+ BUG_ON(pi->vaddr != encode_pointer(iov->iov_base));
+
+ if (send_psi(sk, PS_IOV_ADD, pi->nr_pages, pi->vaddr, pi->dst_id))
+ return -1;
+
+ ret = splice(ppb->p[0], NULL, sk, NULL, iov->iov_len, SPLICE_F_MOVE);
+ if (ret != iov->iov_len)
+ return -1;
+
+ return 0;
+}
+
static int page_server_serve(int sk)
{
int ret = -1;
@@ -584,14 +626,16 @@ static int page_server_serve(int sk)
*/
tcp_nodelay(sk, true);
- if (pipe(cxfer.p)) {
- pr_perror("Can't make pipe for xfer");
- close(sk);
- return -1;
- }
+ if (!opts.lazy_pages) {
+ if (pipe(cxfer.p)) {
+ pr_perror("Can't make pipe for xfer");
+ close(sk);
+ return -1;
+ }
- cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
- pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
+ cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
+ pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
+ }
while (1) {
struct page_server_iov pi;
@@ -643,6 +687,10 @@ static int page_server_serve(int sk)
flushed = true;
break;
}
+ case PS_IOV_GET:
+ flushed = true;
+ ret = page_server_get_pages(sk, &pi);
+ break;
default:
pr_err("Unknown command %u\n", pi.cmd);
ret = -1;
@@ -685,7 +733,8 @@ int cr_page_server(bool daemon_mode, int cfd)
int sk = -1;
int ret;
- up_page_ids_base();
+ if (!opts.lazy_pages)
+ up_page_ids_base();
if (opts.ps_socket != -1) {
ret = 0;
@@ -775,3 +824,40 @@ out:
close_safe(&page_server_sk);
return ret ? : status;
}
+
+int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest)
+{
+ int ret;
+
+ struct page_server_iov pi;
+ /* = { */
+ /* .cmd = PS_IOV_GET, */
+ /* .nr_pages = nr_pages, */
+ /* .vaddr = addr, */
+ /* .dst_id = pid, */
+ /* }; */
+
+ /* ret = write(page_server_sk, &pi, sizeof(pi)); */
+ /* if (ret != sizeof(pi)) */
+ /* return -1; */
+
+ if (send_psi(page_server_sk, PS_IOV_GET, nr_pages, addr, pid))
+ return -1;
+
+ ret = recv(page_server_sk, &pi, sizeof(pi), MSG_WAITALL);
+ if (ret != sizeof(pi))
+ return -1;
+
+ /* zero page */
+ if (pi.cmd == PS_IOV_ZERO)
+ return 0;
+
+ if (pi.nr_pages > nr_pages)
+ return -1;
+
+ ret = recv(page_server_sk, dest, PAGE_SIZE, MSG_WAITALL);
+ if (ret != PAGE_SIZE)
+ return -1;
+
+ return 1;
+}
--
1.9.1
More information about the CRIU
mailing list