[CRIU] [PATCH 1/2] [UGLY PATCH] allow fetching pages from remote page-server
Adrian Reber
adrian at lisas.de
Tue Apr 26 02:38:09 PDT 2016
From: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
criu/cr-restore.c | 5 +-
criu/crtools.c | 4 ++
criu/include/cr_options.h | 1 +
criu/include/page-read.h | 6 ++
criu/include/page-xfer.h | 7 ++
criu/page-read.c | 49 ++++++++++++--
criu/page-xfer.c | 163 +++++++++++++++++++++++++++++++++++++++++++++-
7 files changed, 227 insertions(+), 8 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 34db7f7..5ae1d28 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -453,10 +453,13 @@ static int restore_priv_vma_content(void)
unsigned int nr_lazy = 0;
unsigned long va;
struct page_read pr;
+ int pr_flags = PR_TASK;
vma = list_first_entry(vmas, struct vma_area, list);
- ret = open_page_read(current->pid.virt, &pr, PR_TASK);
+ if (opts.use_page_client)
+ pr_flags |= PR_REMOTE;
+ ret = open_page_read(current->pid.virt, &pr, pr_flags);
if (ret <= 0)
return -1;
diff --git a/criu/crtools.c b/criu/crtools.c
index 93ca90b..211d883 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -321,6 +321,7 @@ int main(int argc, char *argv[], char *envp[])
{ "extra", no_argument, 0, 1077 },
{ "experimental", no_argument, 0, 1078 },
{ "all", no_argument, 0, 1079 },
+ { "page-client", no_argument, 0, 1080 },
{ },
};
@@ -623,6 +624,9 @@ int main(int argc, char *argv[], char *envp[])
opts.check_extra_features = true;
opts.check_experimental_features = true;
break;
+ case 1080:
+ opts.use_page_client = true;
+ break;
case 'V':
pr_msg("Version: %s\n", CRIU_VERSION);
if (strcmp(CRIU_GITID, "0"))
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index b6ae3a1..0c8b122 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -86,6 +86,7 @@ struct cr_options {
struct list_head join_ns;
char *libdir;
bool use_page_server;
+ bool use_page_client;
unsigned short port;
char *addr;
int ps_socket;
diff --git a/criu/include/page-read.h b/criu/include/page-read.h
index 3ba1ee9..abeb4c6 100644
--- a/criu/include/page-read.h
+++ b/criu/include/page-read.h
@@ -40,6 +40,8 @@
* All this is implemented in read_pagemap_page.
*/
+struct page_xfer;
+
struct page_read {
/*
* gets next vaddr:len pair to work on.
@@ -57,6 +59,8 @@ struct page_read {
struct cr_img *pmi;
struct cr_img *pi;
+ struct page_xfer *xfer; /* for remote page reader */
+
PagemapEntry *pe; /* current pagemap we are on */
struct page_read *parent; /* parent pagemap (if ->in_parent
pagemap is met in image, then
@@ -75,6 +79,8 @@ struct page_read {
#define PR_TYPE_MASK 0x3
#define PR_MOD 0x4 /* Will need to modify */
+#define PR_REMOTE 0x8 /* will read pages from remote host */
+
/*
* -1 -- error
* 0 -- no images
diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index 8492daa..86a7c21 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -17,6 +17,10 @@ struct page_xfer {
int (*write_pages)(struct page_xfer *self, int pipe, unsigned long len);
/* transfers one hole -- vaddr:len entry w/o pages */
int (*write_hole)(struct page_xfer *self, struct iovec *iov);
+
+ int (*read_pages)(struct page_xfer *self, unsigned long vaddr,
+ int nr, void *buf);
+
void (*close)(struct page_xfer *self);
/* private data for every page-xfer engine */
@@ -44,4 +48,7 @@ extern int disconnect_from_page_server(void);
extern int check_parent_page_xfer(int fd_type, long id);
+extern int page_xfer_read_pages(struct page_xfer *xfer, unsigned long vaddr,
+ int nr, void *buf);
+
#endif /* __CR_PAGE_XFER__H__ */
diff --git a/criu/page-read.c b/criu/page-read.c
index e5ec76a..8e23dc5 100644
--- a/criu/page-read.c
+++ b/criu/page-read.c
@@ -6,6 +6,7 @@
#include "cr_options.h"
#include "servicefd.h"
#include "page-read.h"
+#include "page-xfer.h"
#include "protobuf.h"
#include "images/pagemap.pb-c.h"
@@ -193,6 +194,13 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
vaddr += p_nr * PAGE_SIZE;
buf += p_nr * PAGE_SIZE;
} while (nr);
+ } else if (pr->xfer) {
+ pr_debug("\tpr%u Read %d remote pages %lx\n", pr->id, nr, vaddr);
+ ret = page_xfer_read_pages(pr->xfer, vaddr, nr, buf);
+ if (ret) {
+ pr_err("cannot get remote pages\n");
+ return -1;
+ }
} else {
int fd = img_raw_fd(pr->pi);
off_t current_vaddr = lseek(fd, 0, SEEK_CUR);
@@ -237,6 +245,11 @@ static void close_page_read(struct page_read *pr)
close_image(pr->pmi);
if (pr->pi)
close_image(pr->pi);
+
+ if (pr->xfer) {
+ pr->xfer->close(pr->xfer);
+ free(pr->xfer);
+ }
}
static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
@@ -301,9 +314,23 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
pr->pe = NULL;
pr->parent = NULL;
+ pr->xfer = NULL;
pr->bunch.iov_len = 0;
pr->bunch.iov_base = NULL;
+ if (pr_flags & PR_REMOTE) {
+ pr->xfer = malloc(sizeof(*pr->xfer));
+ if (!pr->xfer) {
+ pr_err("failed to reseve memory for page-xfer\n");
+ return -1;
+ }
+
+ if (open_page_xfer(pr->xfer, CR_FD_PAGEMAP, pid)) {
+ pr_err("failed to open page-xfer\n");
+ return -1;
+ }
+ }
+
pr->pmi = open_image_at(dfd, i_typ, O_RSTR, (long)pid);
if (!pr->pmi)
return -1;
@@ -318,17 +345,29 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
return -1;
}
- pr->pi = open_pages_image_at(dfd, flags, pr->pmi);
- if (!pr->pi) {
- close_page_read(pr);
- return -1;
+ if (pr_flags & PR_REMOTE) {
+ PagemapHead *h;
+ if (pb_read_one(pr->pmi, &h, PB_PAGEMAP_HEAD) < 0) {
+ pr_err("%s: pb_read_one\n", __func__);
+ return -1;
+ }
+ pagemap_head__free_unpacked(h, NULL);
+
+ pr->skip_pages = NULL;
+ } else {
+ pr->pi = open_pages_image_at(dfd, flags, pr->pmi);
+ if (!pr->pi) {
+ close_page_read(pr);
+ return -1;
+ }
+
+ pr->skip_pages = skip_pagemap_pages;
}
pr->get_pagemap = get_pagemap;
pr->put_pagemap = put_pagemap;
pr->read_pages = read_pagemap_page;
pr->close = close_page_read;
- pr->skip_pages = skip_pagemap_pages;
pr->id = ids++;
pr_debug("Opened page read %u (parent %u)\n",
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 2ebe8cc..df85976 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -13,6 +13,7 @@
#include "image.h"
#include "page-xfer.h"
#include "page-pipe.h"
+#include "page-read.h"
#include "util.h"
#include "protobuf.h"
#include "images/pagemap.pb-c.h"
@@ -43,6 +44,8 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id);
#define PS_IOV_OPEN 3
#define PS_IOV_OPEN2 4
#define PS_IOV_PARENT 5
+#define PS_IOV_OPEN3 6
+#define PS_IOV_GET 7
#define PS_IOV_FLUSH 0x1023
#define PS_IOV_FLUSH_N_CLOSE 0x1024
@@ -112,6 +115,24 @@ static int page_server_open(int sk, struct page_server_iov *pi)
return 0;
}
+static int page_server_open3(int sk, struct page_server_iov *pi)
+{
+ int type;
+ long id;
+ char has_parent = 23;
+
+ type = decode_pm_type(pi->dst_id);
+ id = decode_pm_id(pi->dst_id);
+ pr_debug("Opening %d/%ld\n", type, id);
+
+ if (write(sk, &has_parent, 1) != 1) {
+ pr_perror("Unable to send reponse");
+ return -1;
+ }
+
+ return 0;
+}
+
static int prep_loc_xfer(struct page_server_iov *pi)
{
if (cxfer.dst_id != pi->dst_id) {
@@ -176,6 +197,57 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
return 0;
}
+static int page_server_get(int sk, struct page_server_iov *pi)
+{
+ struct page_read page_read;
+ struct iovec iov;
+ unsigned long len;
+ int type, id, ret;
+ char *buf;
+
+ type = decode_pm_type(pi->dst_id);
+ id = decode_pm_id(pi->dst_id);
+ pr_debug("Get %d/%d\n", type, id);
+
+ len = pi->nr_pages * PAGE_SIZE;
+ buf = malloc(len);
+ if (!buf) {
+ pr_err("allocation failed\n");
+ return -1;
+ }
+
+ open_page_read(id, &page_read, PR_TASK);
+
+ ret = page_read.get_pagemap(&page_read, &iov);
+ pr_debug("get_pagemap ret %d\n", ret);
+ if (ret <= 0)
+ return ret;
+
+ ret = seek_pagemap_page(&page_read, pi->vaddr, true);
+ pr_debug("seek_pagemap_page ret 0x%x\n", ret);
+ if (ret <= 0)
+ return ret;
+
+ ret = page_read.read_pages(&page_read, pi->vaddr, pi->nr_pages, buf);
+ if (ret < 0) {
+ pr_err("%s: read_pages: %d\n", __func__, ret);
+ goto out;
+ }
+
+ ret = write(sk, buf, len);
+ if (ret != len) {
+ pr_err("%s: Can't send the pages:%d\n", __func__, ret);
+ ret = -1;
+ goto out;
+ }
+
+ page_read.close(&page_read);
+ ret = 0;
+out:
+ free(buf);
+ return ret;
+}
+
static int page_server_check_parent(int sk, struct page_server_iov *pi);
static int page_server_serve(int sk)
@@ -221,6 +293,9 @@ static int page_server_serve(int sk)
case PS_IOV_OPEN2:
ret = page_server_open(sk, &pi);
break;
+ case PS_IOV_OPEN3:
+ ret = page_server_open3(sk, &pi);
+ break;
case PS_IOV_PARENT:
ret = page_server_check_parent(sk, &pi);
break;
@@ -230,6 +305,9 @@ static int page_server_serve(int sk)
case PS_IOV_HOLE:
ret = page_server_hole(sk, &pi);
break;
+ case PS_IOV_GET:
+ ret = page_server_get(sk, &pi);
+ break;
case PS_IOV_FLUSH:
case PS_IOV_FLUSH_N_CLOSE:
{
@@ -322,8 +400,10 @@ static int page_server_sk = -1;
int connect_to_page_server(void)
{
- if (!opts.use_page_server)
+ if (!(opts.use_page_server || opts.use_page_client)) {
+ pr_err("mutually exclusive page-server and page-client options\n");
return 0;
+ }
if (opts.ps_socket != -1) {
page_server_sk = opts.ps_socket;
@@ -332,8 +412,11 @@ int connect_to_page_server(void)
}
page_server_sk = setup_tcp_client(opts.addr);
- if (page_server_sk == -1)
+ if (page_server_sk == -1) {
+ pr_err("setup_tcp_client\n");
return -1;
+ }
+
out:
/*
* CORK the socket at the very beginning. As per ANK
@@ -431,6 +514,33 @@ static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov)
return 0;
}
+static int read_pages_from_server(struct page_xfer *xfer, unsigned long vaddr,
+ int nr, void *buf)
+{
+ struct page_server_iov pi;
+ unsigned long len;
+ int ret;
+
+ pi.cmd = PS_IOV_GET;
+ pi.dst_id = xfer->dst_id;
+ pi.nr_pages = nr;
+ pi.vaddr = vaddr;
+ len = nr * page_size();
+
+ if (write(xfer->sk, &pi, sizeof(pi)) != sizeof(pi)) {
+ pr_perror("Can't write GET cmd to server");
+ return -1;
+ }
+
+ ret = recv(xfer->sk, buf, len, MSG_WAITALL);
+ if (ret != len) {
+ pr_err("%s: recv failed: %d\n", __func__, ret);
+ return -1;
+ }
+
+ return 0;
+}
+
static void close_server_xfer(struct page_xfer *xfer)
{
xfer->sk = -1;
@@ -445,6 +555,7 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id)
xfer->write_pagemap = write_pagemap_to_server;
xfer->write_pages = write_pages_to_server;
xfer->write_hole = write_hole_to_server;
+ xfer->read_pages = read_pages_from_server;
xfer->close = close_server_xfer;
xfer->dst_id = encode_pm_id(fd_type, id);
xfer->parent = NULL;
@@ -473,6 +584,46 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id)
return 0;
}
+static void close_client_xfer(struct page_xfer *xfer)
+{
+ close(xfer->sk);
+}
+
+static int open_page_client_xfer(struct page_xfer *xfer, int fd_type, long id)
+{
+ struct page_server_iov pi;
+ char has_parent;
+
+ connect_to_page_server();
+
+ xfer->sk = page_server_sk;
+ xfer->read_pages = read_pages_from_server;
+ xfer->close = close_client_xfer;
+ xfer->dst_id = encode_pm_id(fd_type, id);
+ xfer->parent = NULL;
+
+ pi.cmd = PS_IOV_OPEN3;
+ pi.dst_id = xfer->dst_id;
+ pi.vaddr = 0;
+ pi.nr_pages = 0;
+
+ if (write(xfer->sk, &pi, sizeof(pi)) != sizeof(pi)) {
+ pr_perror("Can't write to page server");
+ return -1;
+ }
+
+ /* Push the command NOW */
+ tcp_nodelay(xfer->sk, true);
+
+ if (read(xfer->sk, &has_parent, 1) != 1) {
+ pr_perror("The page server doesn't answer");
+ return -1;
+ }
+ pr_debug("has_parent=%d\n", has_parent);
+
+ return 0;
+}
+
static int write_pagemap_loc(struct page_xfer *xfer,
struct iovec *iov)
{
@@ -703,6 +854,8 @@ int open_page_xfer(struct page_xfer *xfer, int fd_type, long id)
{
if (opts.use_page_server)
return open_page_server_xfer(xfer, fd_type, id);
+ else if (opts.use_page_client)
+ return open_page_client_xfer(xfer, fd_type, id);
else
return open_page_local_xfer(xfer, fd_type, id);
}
@@ -785,3 +938,9 @@ int check_parent_page_xfer(int fd_type, long id)
else
return check_parent_local_xfer(fd_type, id);
}
+
+int page_xfer_read_pages(struct page_xfer *xfer, unsigned long vaddr,
+ int nr, void *buf)
+{
+ return xfer->read_pages ? xfer->read_pages(xfer, vaddr, nr, buf) : -1;
+}
--
1.8.3.1
More information about the CRIU
mailing list