[CRIU] [PATCH RFC v3 2/4] mem: Introduce image-proxy/image-cache & remote option

Katerina Koukiou k.koukiou at googlemail.com
Wed Aug 10 06:19:36 PDT 2016


This patch introduces --remote option and image-proxy/image-cache processes.
This leaves user the option to decide if the checkpoint data are to be stored
on disk or sent through socket to the image-proxy.
The latter forwards the data to the destination node where image-cache receives
them.

The overall communication is performed as follows:
rc_node CRIU dump -> (sends images using a local socket) ->     image-proxy
									   |
									   V
dst_node: CRIU restore <- (receives images from a local socket)   <- image-cache

Running criu with --remote option is like this:

dst_node# criu image-cache --port <port> -o /tmp/image-cache.log
--local-cache-path <local_cache_path> ...
dst_node# criu restore --remote -o /tmp/image-cache.log
--local-cache-path <local_cache_path> ...
src_node# criu image-proxy --port <port> --address <dst_node> -o /tmp/image-proxy.log
--local-proxy-path <local_proxy_path> ...
src_node# criu dump -t <pid> --remote -o /tmp/dump.log
--local-proxy-path <local_proxy_path> ...

Signed-off-by: Rodrigo Bruno <rbruno at gsd.inesc-id.pt>
Signed-off-by: Katerina Koukiou <k.koukiou at gmail.com>
---
 criu/Makefile.crtools        |   4 +
 criu/cr-dump.c               |  17 +++
 criu/crtools.c               |  30 ++++-
 criu/image-desc.c            |   4 +-
 criu/image.c                 |  28 ++++-
 criu/img-remote-proto.c      |   4 +
 criu/img-remote.c            | 278 +++++++++++++++++++++++++++++++++++++++++++
 criu/include/cr_options.h    |   3 +
 criu/include/image.h         |   1 +
 criu/include/img-remote.h    |  79 ++++++++++++
 criu/include/protobuf-desc.h |   4 +
 criu/page-xfer.c             |  26 +++-
 criu/pagemap.c               |  53 +++++++--
 criu/protobuf-desc.c         |   1 +
 images/Makefile              |   1 +
 images/remote-image.proto    |  20 ++++
 16 files changed, 529 insertions(+), 24 deletions(-)
 create mode 100644 criu/img-remote.c
 create mode 100644 criu/include/img-remote.h
 create mode 100644 images/remote-image.proto

diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
index 8e9c3b4..95a0521 100644
--- a/criu/Makefile.crtools
+++ b/criu/Makefile.crtools
@@ -26,6 +26,10 @@ obj-y			+= files-reg.o
 obj-y			+= fsnotify.o
 obj-y			+= image-desc.o
 obj-y			+= image.o
+obj-y			+= img-remote.o
+obj-y			+= img-proxy.o
+obj-y			+= img-cache.o
+obj-y			+= img-remote-proto.o
 obj-y			+= ipc_ns.o
 obj-y			+= irmap.o
 obj-y			+= kcmp-ids.o
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 06ff2d7..5b5a232 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -84,6 +84,8 @@
 
 #include "asm/dump.h"
 
+#include "img-remote.h"
+
 static char loc_buf[PAGE_SIZE];
 
 void free_mappings(struct vm_area_list *vma_area_list)
@@ -1504,6 +1506,11 @@ int cr_pre_dump_tasks(pid_t pid)
 	struct pstree_item *item;
 	int ret = -1;
 
+	if (opts.remote && push_snapshot_id() < 0) {
+		pr_err("Failed to push image namespace.\n");
+		goto err;
+	}
+
 	root_item = alloc_pstree_item();
 	if (!root_item)
 		goto err;
@@ -1660,6 +1667,11 @@ static int cr_dump_finish(int ret)
 
 	close_service_fd(CR_PROC_FD_OFF);
 
+	if (opts.remote && (finish_remote_dump() < 0)) {
+		pr_err("Finish remote dump failed.\n");
+		return post_dump_ret ? : 1;
+	}
+
 	if (ret) {
 		pr_err("Dumping FAILED.\n");
 	} else {
@@ -1680,6 +1692,11 @@ int cr_dump_tasks(pid_t pid)
 	pr_info("Dumping processes (pid: %d)\n", pid);
 	pr_info("========================================\n");
 
+	if (opts.remote && push_snapshot_id() < 0) {
+		pr_err("Failed to push image namespace.\n");
+		goto err;
+	}
+
 	root_item = alloc_pstree_item();
 	if (!root_item)
 		goto err;
diff --git a/criu/crtools.c b/criu/crtools.c
index 7e11c22..2d1ece2 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -48,6 +48,7 @@
 #include "namespaces.h"
 #include "setproctitle.h"
 #include "sysctl.h"
+#include "img-remote.h"
 
 struct cr_options opts;
 
@@ -72,6 +73,10 @@ void init_opts(void)
 	opts.ghost_limit = DEFAULT_GHOST_LIMIT;
 	opts.timeout = DEFAULT_TIMEOUT;
 	opts.empty_ns = 0;
+	opts.addr = DEFAULT_CACHE_HOST;
+	opts.port = DEFAULT_CACHE_PORT;
+	opts.local_cache_path = DEFAULT_IMG_PATH;
+	opts.local_proxy_path = DEFAULT_IMG_PATH;
 }
 
 static int parse_join_ns(const char *ptr)
@@ -324,6 +329,9 @@ int main(int argc, char *argv[], char *envp[])
 		{ "cgroup-props-file",		required_argument,	0, 1081	},
 		{ "cgroup-dump-controller",	required_argument,	0, 1082	},
 		{ SK_INFLIGHT_PARAM,		no_argument,		0, 1083	},
+		{ "remote",			no_argument,		0, 1084 },
+		{ "local-cache-path",		required_argument,	0, 1085 },
+		{ "local-proxy-path",		required_argument,	0, 1086 },
 		{ },
 	};
 
@@ -639,6 +647,15 @@ int main(int argc, char *argv[], char *envp[])
 			pr_msg("Will skip in-flight TCP connections\n");
 			opts.tcp_skip_in_flight = true;
 			break;
+		case 1084:
+			opts.remote = true;
+			break;
+		case 1085:
+			opts.local_cache_path = optarg;
+			break;
+		case 1086:
+			opts.local_proxy_path = optarg;
+			break;
 		case 'V':
 			pr_msg("Version: %s\n", CRIU_VERSION);
 			if (strcmp(CRIU_GITID, "0"))
@@ -794,6 +811,12 @@ int main(int argc, char *argv[], char *envp[])
 	if (!strcmp(argv[optind], "page-server"))
 		return cr_page_server(opts.daemon_mode, -1) > 0 ? 0 : 1;
 
+	if (!strcmp(argv[optind], "image-cache"))
+		return image_cache(opts.local_cache_path, opts.port);
+
+	if (!strcmp(argv[optind], "image-proxy"))
+		return image_proxy(opts.local_proxy_path, opts.addr, opts.port);
+
 	if (!strcmp(argv[optind], "service"))
 		return cr_service(opts.daemon_mode);
 
@@ -821,6 +844,8 @@ usage:
 "  criu service [<options>]\n"
 "  criu dedup\n"
 "  criu lazy-pages -D DIR [<options>]\n"
+"  criu image-cache [<options>]\n"
+"  criu image-proxy [<options>]\n"
 "\n"
 "Commands:\n"
 "  dump           checkpoint a process/tree identified by pid\n"
@@ -833,6 +858,8 @@ usage:
 "  dedup          remove duplicates in memory dump\n"
 "  cpuinfo dump   writes cpu information into image file\n"
 "  cpuinfo check  validates cpu information read from image file\n"
+"  image-cache    launch destination-side cache for images sent from the source-side\n"
+"  image-proxy    launch source-side proxy to sent images to the destination-side\n"
 	);
 
 	if (usage_error) {
@@ -864,6 +891,7 @@ usage:
 "                        this requires running a second instance of criu\n"
 "                        in lazy-pages mode: 'criu lazy-pages -D DIR'\n"
 "                        --lazy-pages and lazy-pages mode require userfaultfd\n"
+"  --remote              dump/restore images directly to/from remote node using image-proxy/image-cache\n"
 "\n"
 "* Special resources support:\n"
 "  -x|--" USK_EXT_PARAM "inode,.." "      allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
@@ -973,7 +1001,7 @@ usage:
 "\n"
 "Page/Service server options:\n"
 "  --address ADDR        address of server or service\n"
-"  --port PORT           port of page server\n"
+"  --port PORT           port of page serve or service\n"
 "  -d|--daemon           run in the background after creating socket\n"
 "\n"
 "Other options:\n"
diff --git a/criu/image-desc.c b/criu/image-desc.c
index 2b31354..e146ef8 100644
--- a/criu/image-desc.c
+++ b/criu/image-desc.c
@@ -102,13 +102,13 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
 	[CR_FD_STATS] = {
 		.fmt	= "stats-%s",
 		.magic	= STATS_MAGIC,
-		.oflags = O_SERVICE,
+		.oflags = O_SERVICE | O_FORCE_LOCAL,
 	},
 
 	[CR_FD_IRMAP_CACHE] = {
 		.fmt	= "irmap-cache",
 		.magic	= IRMAP_CACHE_MAGIC,
-		.oflags = O_SERVICE,
+		.oflags = O_SERVICE | O_FORCE_LOCAL,
 	},
 
 	[CR_FD_FILE_LOCKS_PID] = {
diff --git a/criu/image.c b/criu/image.c
index a3bb285..38a8ea9 100644
--- a/criu/image.c
+++ b/criu/image.c
@@ -13,6 +13,7 @@
 #include "protobuf.h"
 #include "images/inventory.pb-c.h"
 #include "images/pagemap.pb-c.h"
+#include "img-remote.h"
 
 bool ns_per_id = false;
 bool img_common_magic = true;
@@ -309,11 +310,28 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of
 {
 	int ret, flags;
 
-	flags = oflags & ~(O_NOBUF | O_SERVICE);
+	flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);
 
-	ret = openat(dfd, path, flags, CR_FD_PERM);
+	if (opts.remote && !(oflags & O_FORCE_LOCAL)) {
+		char *snapshot_id = NULL;
+
+		snapshot_id = get_snapshot_id_from_idx(dfd);
+
+		if (snapshot_id == NULL)
+			ret = -1;
+		else if (flags == O_RDONLY) {
+			pr_debug("do_open_remote_image RDONLY path=%s snapshot_id=%s\n",
+					path, snapshot_id);
+			ret = read_remote_image_connection(snapshot_id, path);
+		} else {
+			pr_debug("do_open_remote_image WDONLY path=%s snapshot_id=%s\n",
+					path, snapshot_id);
+			ret = write_remote_image_connection(snapshot_id, path, O_WRONLY);
+		}
+	} else
+		ret = openat(dfd, path, flags, CR_FD_PERM);
 	if (ret < 0) {
-		if (!(flags & O_CREAT) && (errno == ENOENT)) {
+		if (!(flags & O_CREAT) && (errno == ENOENT || ret == -ENOENT)) {
 			pr_info("No %s image\n", path);
 			img->_x.fd = EMPTY_IMG_FD;
 			goto skip_magic;
@@ -413,7 +431,9 @@ int open_image_dir(char *dir)
 	close(fd);
 	fd = ret;
 
-	if (opts.img_parent) {
+	if (opts.remote) {
+		init_snapshot_id(dir);
+	} else if (opts.img_parent) {
 		ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);
 		if (ret < 0 && errno != EEXIST) {
 			pr_perror("Can't link parent snapshot");
diff --git a/criu/img-remote-proto.c b/criu/img-remote-proto.c
index d8fd8cd..19cfe35 100644
--- a/criu/img-remote-proto.c
+++ b/criu/img-remote-proto.c
@@ -236,6 +236,10 @@ int setup_TCP_client_socket(char *hostname, int port)
 
 int setup_UNIX_server_socket(char *path)
 {
+	if (!path) {
+		pr_err("Path should not be empty\n");
+		return -1;
+	}
 	struct sockaddr_un addr;
 	int sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
 
diff --git a/criu/img-remote.c b/criu/img-remote.c
new file mode 100644
index 0000000..9e244c6
--- /dev/null
+++ b/criu/img-remote.c
@@ -0,0 +1,278 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include "xmalloc.h"
+#include "criu-log.h"
+#include "img-remote.h"
+#include "img-remote-proto.h"
+#include "images/remote-image.pb-c.h"
+#include "protobuf-desc.h"
+#include <fcntl.h>
+#include "servicefd.h"
+#include "compiler.h"
+#include "cr_options.h"
+
+#define PB_LOCAL_IMAGE_SIZE PATHLEN
+
+static char *snapshot_id;
+
+LIST_HEAD(snapshot_head);
+
+/* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an
+ * ID which corresponds to the working directory specefied by the user.
+ */
+struct snapshot {
+	char snapshot_id[PATHLEN];
+	struct list_head l;
+};
+
+struct snapshot *new_snapshot(char *snapshot_id)
+{
+	struct snapshot *s = malloc(sizeof(struct snapshot));
+
+	if (!s) {
+		pr_perror("Failed to allocate snapshot structure");
+		return NULL;
+	}
+	strncpy(s->snapshot_id, snapshot_id, PATHLEN);
+	return s;
+}
+
+void add_snapshot(struct snapshot *snapshot)
+{
+	list_add_tail(&(snapshot->l), &snapshot_head);
+}
+
+static char *get_local_img_path(void)
+{
+	static char *local_img_path = NULL;
+
+	if (local_img_path != NULL)
+		return local_img_path;
+
+	if (strcmp(opts.local_cache_path, DEFAULT_IMG_PATH))
+		local_img_path = opts.local_cache_path;
+	else if (strcmp(opts.local_proxy_path, DEFAULT_IMG_PATH))
+		local_img_path = opts.local_proxy_path;
+	else if (opts.local_proxy_path || opts.local_cache_path)
+		local_img_path = DEFAULT_IMG_PATH;
+	else
+		pr_err("Local img path is missing. Possible missing "
+				"--local-{cache,proxy}-path option\n");
+
+	return local_img_path;
+}
+
+int read_remote_image_connection(char *snapshot_id, char *path)
+{
+	int error;
+	int sockfd = setup_UNIX_client_socket(get_local_img_path());
+
+	if (sockfd < 0) {
+		pr_perror("Error opening local connection for %s:%s", path, snapshot_id);
+		return -1;
+	}
+
+	if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) {
+		pr_perror("Error writing header for %s:%s", path, snapshot_id);
+		return -1;
+	}
+
+	if (read_reply_header(sockfd, &error) < 0) {
+		pr_perror("Error reading reply header for %s:%s", path, snapshot_id);
+		return -1;
+	}
+	if (!error)
+		return sockfd;
+	else if (error == ENOENT) {
+		pr_info("Image does not exist (%s:%s)\n", path, snapshot_id);
+		close(sockfd);
+		return -ENOENT;
+	}
+	pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path, snapshot_id);
+	close(sockfd);
+	return -1;
+}
+
+int write_remote_image_connection(char *snapshot_id, char *path, int flags)
+{
+	int sockfd = setup_UNIX_client_socket(get_local_img_path());
+
+	if (sockfd < 0)
+		return -1;
+
+	if (write_header(sockfd, snapshot_id, path, flags) < 0) {
+		pr_perror("Error writing header for %s:%s", path, snapshot_id);
+		return -1;
+	}
+	return sockfd;
+}
+
+int finish_remote_dump(void)
+{
+	pr_info("Dump side is calling finish\n");
+	int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, DUMP_FINISH, O_WRONLY);
+
+	if (fd == -1) {
+		pr_perror("Unable to open finish dump connection");
+		return -1;
+	}
+
+	close(fd);
+	return 0;
+}
+
+int skip_remote_bytes(int fd, unsigned long len)
+{
+	static char buf[4096];
+	int n = 0;
+	unsigned long curr = 0;
+
+	for (; curr < len; ) {
+		n = read(fd, buf, min(len - curr, (unsigned long)4096));
+		if (n == 0) {
+			pr_perror("Unexpected end of stream (skipping %lx/%lx bytes)",
+				curr, len);
+			return -1;
+		} else if (n > 0) {
+			curr += n;
+		} else {
+			pr_perror("Error while skipping bytes from stream (%lx/%lx)",
+				curr, len);
+			return -1;
+		}
+	}
+
+	if (curr != len) {
+		pr_perror("Unable to skip the current number of bytes: %lx instead of %lx",
+			curr, len);
+		return -1;
+	}
+	return 0;
+}
+
+static int pull_snapshot_ids(void)
+{
+	int n, sockfd;
+	SnapshotIdEntry *ls;
+	struct snapshot *s = NULL;
+
+	sockfd = read_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG);
+
+	/* The connection was successful but there is not file. */
+	if (sockfd < 0 && errno == ENOENT)
+		return 0;
+	else if (sockfd < 0) {
+		pr_perror("Unable to open snapshot id read connection");
+		return -1;
+	}
+
+	while (1) {
+		n = pb_read_obj(sockfd, (void **)&ls, PB_SNAPSHOT_ID);
+		if (!n) {
+			close(sockfd);
+			return n;
+		} else if (n < 0) {
+			pr_perror("Unable to read remote snapshot ids");
+			close(sockfd);
+			return n;
+		}
+
+		s = new_snapshot(ls->snapshot_id);
+		if (!s) {
+			pr_perror("Unable create new snapshot structure");
+			close(sockfd);
+			return -1;
+		}
+		add_snapshot(s);
+		pr_info("[read_snapshot ids] parent = %s\n", ls->snapshot_id);
+	}
+	free(ls);
+	close(sockfd);
+	return n;
+}
+
+int push_snapshot_id(void)
+{
+	int n;
+	SnapshotIdEntry rn = SNAPSHOT_ID_ENTRY__INIT;
+	int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND);
+
+	if (sockfd < 0) {
+		pr_perror("Unable to open snapshot id push connection");
+		return -1;
+	}
+
+	rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN);
+	if (!rn.snapshot_id) {
+		pr_perror("Unable to allocate snapshot id buffer");
+		close(sockfd);
+		return -1;
+	}
+	strncpy(rn.snapshot_id, snapshot_id, PATHLEN);
+
+	n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID);
+
+	xfree(rn.snapshot_id);
+	close(sockfd);
+	return n;
+}
+
+void init_snapshot_id(char *si)
+{
+	snapshot_id = si;
+}
+
+char *get_curr_snapshot_id(void)
+{
+	return snapshot_id;
+}
+
+int get_curr_snapshot_id_idx(void)
+{
+	struct snapshot *si;
+	int idx = 0;
+
+	if (list_empty(&snapshot_head))
+		pull_snapshot_ids();
+
+	list_for_each_entry(si, &snapshot_head, l) {
+	if (!strncmp(si->snapshot_id, snapshot_id, PATHLEN))
+			return idx;
+		idx++;
+	}
+
+	pr_perror("Error, could not find current snapshot id (%s) fd", snapshot_id);
+	return -1;
+}
+
+char *get_snapshot_id_from_idx(int idx)
+{
+	struct snapshot *si;
+
+	if (list_empty(&snapshot_head))
+		pull_snapshot_ids();
+
+	/* Note: if idx is the service fd then we need the current
+	 * snapshot_id idx. Else we need a parent snapshot_id idx.
+	 */
+	if (idx == get_service_fd(IMG_FD_OFF))
+		idx = get_curr_snapshot_id_idx();
+
+	list_for_each_entry(si, &snapshot_head, l) {
+		if (!idx)
+			return si->snapshot_id;
+		idx--;
+	}
+
+	pr_perror("Error, could not find snapshot id for idx %d", idx);
+	return NULL;
+}
+
+int get_curr_parent_snapshot_id_idx(void)
+{
+	return get_curr_snapshot_id_idx() - 1;
+}
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index 35c1ace..6f1b202 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -113,6 +113,9 @@ struct cr_options {
 	bool			lazy_pages;
 	bool			tcp_skip_in_flight;
 	char			*work_dir;
+	bool			remote;
+	char			*local_cache_path;
+	char			*local_proxy_path;
 };
 
 extern struct cr_options opts;
diff --git a/criu/include/image.h b/criu/include/image.h
index 65b7b0a..9ba6ab8 100644
--- a/criu/include/image.h
+++ b/criu/include/image.h
@@ -104,6 +104,7 @@ extern bool img_common_magic;
 #define O_DUMP		(O_WRONLY | O_CREAT | O_TRUNC)
 #define O_SHOW		(O_RDONLY | O_NOBUF)
 #define O_RSTR		(O_RDONLY)
+#define O_FORCE_LOCAL   (O_SYNC)
 
 struct cr_img {
 	union {
diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h
new file mode 100644
index 0000000..706b67f
--- /dev/null
+++ b/criu/include/img-remote.h
@@ -0,0 +1,79 @@
+#include <limits.h>
+
+#ifndef IMAGE_REMOTE_H
+#define	IMAGE_REMOTE_H
+
+#define PATHLEN PATH_MAX
+#define DUMP_FINISH "DUMP_FINISH"
+#define PARENT_IMG "parent"
+#define NULL_SNAPSHOT_ID "null"
+#define DEFAULT_IMG_PATH "/tmp/criu-img-path.sock"
+#define DEFAULT_CACHE_PORT 9996
+#define DEFAULT_CACHE_HOST "localhost"
+
+/* Called by restore to get the fd correspondent to a particular path. This call
+ * will block until the connection is received.
+ */
+int read_remote_image_connection(char *snapshot_id, char *path);
+
+/* Called by dump to create a socket connection to the restore side. The socket
+ * fd is returned for further writing operations.
+ */
+int write_remote_image_connection(char *snapshot_id, char *path, int flags);
+
+/* Called by dump when everything is dumped. This function creates a new
+ * connection with a special control name. The recover side uses it to ack that
+ * no more files are coming.
+ */
+int finish_remote_dump();
+
+/* Starts an image proxy daemon (dump side). It receives image files through
+ * socket connections and forwards them to the image cache (restore side).
+ */
+int image_proxy(char *local_proxy_path, char *cache_host, unsigned short cache_port);
+
+/* Starts an image cache daemon (restore side). It receives image files through
+ * socket connections and caches them until they are requested by the restore
+ * process.
+ */
+int image_cache(char *local_cache_path, unsigned short cache_port);
+
+/* Reads (discards) 'len' bytes from fd. This is used to emulate the function
+ * lseek, which is used to advance the file needle.
+ */
+int skip_remote_bytes(int fd, unsigned long len);
+
+/* To support iterative migration, the concept of snapshot_id is introduced
+ * (only when remote migration is enabled). Each image is tagged with one
+ * snapshot_id. The snapshot_id is the image directory used for the operation
+ * that creates the image (either predump or dump). Images stored in memory
+ * (both in Image Proxy and Image Cache) are identified by their name and
+ * snapshot_id. Snapshot_ids are ordered so that we can find parent pagemaps
+ * (that will be used when restoring the process).
+ */
+
+/* Sets the current snapshot_id */
+void init_snapshot_id(char *ns);
+
+/* Returns the current snapshot_id. */
+char *get_curr_snapshot_id();
+
+/* Returns the snapshot_id index representing the current snapshot_id. This
+ * index represents the hierarchy position. For example: images tagged with
+ * the snapshot_id with index 1 are more recent than the images tagged with
+ * the snapshot_id with index 0.
+ */
+int get_curr_snapshot_id_idx();
+
+/* Returns the snapshot_id associated with the snapshot_id index. */
+char *get_snapshot_id_from_idx(int idx);
+
+/* Pushes the current snapshot_id into the snapshot_id hierarchy (into the Image
+ * Proxy and Image Cache).
+ */
+int push_snapshot_id();
+
+/* Returns the snapshot id index that preceeds the current snapshot_id. */
+int get_curr_parent_snapshot_id_idx();
+
+#endif
diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
index 6c76b49..43ac534 100644
--- a/criu/include/protobuf-desc.h
+++ b/criu/include/protobuf-desc.h
@@ -59,6 +59,10 @@ enum {
 	PB_BINFMT_MISC,		/* 50 */
 	PB_TTY_DATA,
 	PB_AUTOFS,
+	PB_REMOTE_IMAGE,        /* Header for images sent from proxy to cache.*/
+	PB_LOCAL_IMAGE,         /* Header for reading/writing images from/to proxy or cache. */
+	PB_LOCAL_IMAGE_REPLY,	/* Header for reading/writing images reply. */
+	PB_SNAPSHOT_ID,         /* Contains a single id. Used for reading/writing ids from proxy or cache. */
 
 	/* PB_AUTOGEN_STOP */
 
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 0da20e2..678ed85 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -19,6 +19,8 @@
 #include "pstree.h"
 #include "parasite-syscall.h"
 
+#include "img-remote.h"
+
 static int page_server_sk = -1;
 
 struct page_server_iov {
@@ -310,7 +312,8 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
 
 		xfer->parent = xmalloc(sizeof(*xfer->parent));
 		if (!xfer->parent) {
-			close(pfd);
+			if (!opts.remote)
+				close(pfd);
 			return -1;
 		}
 
@@ -319,10 +322,12 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
 			pr_perror("No parent image found, though parent directory is set");
 			xfree(xfer->parent);
 			xfer->parent = NULL;
-			close(pfd);
+			if (!opts.remote)
+				close(pfd);
 			goto out;
 		}
-		close(pfd);
+		if (!opts.remote)
+			close(pfd);
 	}
 
 out:
@@ -459,9 +464,16 @@ int check_parent_local_xfer(int fd_type, int id)
 	struct stat st;
 	int ret, pfd;
 
-	pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
-	if (pfd < 0 && errno == ENOENT)
-		return 0;
+	if (opts.remote) {
+		pfd = get_curr_parent_snapshot_id_idx();
+		pr_err("Unable to get parent snapsgot id");
+		if (pfd == -1)
+			return -1;
+	} else {
+		pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
+		if (pfd < 0 && errno == ENOENT)
+			return 0;
+	}
 
 	snprintf(path, sizeof(path), imgset_template[fd_type].fmt, id);
 	ret = fstatat(pfd, path, &st, 0);
@@ -523,6 +535,8 @@ int check_parent_page_xfer(int fd_type, long id)
 {
 	if (opts.use_page_server)
 		return check_parent_server_xfer(fd_type, id);
+	else if (opts.remote)
+		return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1;
 	else
 		return check_parent_local_xfer(fd_type, id);
 }
diff --git a/criu/pagemap.c b/criu/pagemap.c
index 227d561..81b31c5 100644
--- a/criu/pagemap.c
+++ b/criu/pagemap.c
@@ -11,6 +11,8 @@
 #include "protobuf.h"
 #include "images/pagemap.pb-c.h"
 
+#include "img-remote.h"
+
 #ifndef SEEK_DATA
 #define SEEK_DATA	3
 #define SEEK_HOLE	4
@@ -139,6 +141,8 @@ static int get_pagemap(struct page_read *pr, struct iovec *iov)
 		if (!pe->zero)
 			break;
 		put_pagemap(pr);
+
+		pe = pr->pmes[pr->curr_pme];
 	}
 
 	pagemap2iovec(pe, iov);
@@ -160,7 +164,7 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
 		return;
 
 	pr_debug("\tpr%u Skip %lu bytes from page-dump\n", pr->id, len);
-	if (!pr->pe->in_parent && !pr->pe->zero && !pr->pe->lazy)
+	if (!pr->pe->in_parent && !pr->pe->zero && !pr->pe->lazy && !opts.remote)
 		pr->pi_off += len;
 	pr->cvaddr += len;
 }
@@ -268,12 +272,18 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
 	} else {
 		int fd = img_raw_fd(pr->pi);
 		off_t current_vaddr = lseek(fd, pr->pi_off, SEEK_SET);
+		size_t curr = 0;
 
 		pr_debug("\tpr%u Read page from self %lx/%"PRIx64"\n", pr->id, pr->cvaddr, current_vaddr);
-		ret = read(fd, buf, len);
-		if (ret != len) {
-			pr_perror("Can't read mapping page %d", ret);
-			return -1;
+		while (1) {
+			ret = read(fd, buf + curr, len - curr);
+			if (ret < 1) {
+				pr_perror("Can't read mapping page %d", ret);
+				return -1;
+			}
+			curr += ret;
+			if (curr == len)
+				break;
 		}
 
 		pr->pi_off += len;
@@ -345,9 +355,24 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
 	int pfd, ret;
 	struct page_read *parent = NULL;
 
-	pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
-	if (pfd < 0 && errno == ENOENT)
-		goto out;
+	if (opts.remote) {
+		/* Note: we are replacing a real directory FD for a snapshot_id
+		 * index. Since we need the parent of the current snapshot_id,
+		 * we want the current snapshot_id index minus one. It is
+		 * possible that dfd is already a snapshot_id index. We test it
+		 * by comparing it to the service FD. When opening an image (see
+		 * do_open_image) we convert the snapshot_id index into a real
+		 * snapshot_id.
+		 */
+		pfd = dfd == get_service_fd(IMG_FD_OFF) ?
+			get_curr_snapshot_id_idx() - 1 : dfd - 1;
+		if (pfd < 0)
+			goto out;
+	} else {
+		pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
+		if (pfd < 0 && errno == ENOENT)
+			goto out;
+	}
 
 	parent = xmalloc(sizeof(*parent));
 	if (!parent)
@@ -362,7 +387,8 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
 		parent = NULL;
 	}
 
-	close(pfd);
+	if (!opts.remote)
+		close(pfd);
 out:
 	pr->parent = parent;
 	return 0;
@@ -370,7 +396,8 @@ out:
 err_free:
 	xfree(parent);
 err_cl:
-	close(pfd);
+	if (!opts.remote)
+		close(pfd);
 	return -1;
 }
 
@@ -387,7 +414,11 @@ static int init_pagemaps(struct page_read *pr)
 	off_t fsize;
 	int nr_pmes, nr_realloc;
 
-	fsize = img_raw_size(pr->pmi);
+	if (!opts.remote)
+		fsize = img_raw_size(pr->pmi);
+	else
+		fsize = 1024; /*FIXME*/
+
 	if (fsize < 0)
 		return -1;
 
diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c
index 9352a76..c1850f9 100644
--- a/criu/protobuf-desc.c
+++ b/criu/protobuf-desc.c
@@ -64,6 +64,7 @@
 #include "images/seccomp.pb-c.h"
 #include "images/binfmt-misc.pb-c.h"
 #include "images/autofs.pb-c.h"
+#include "images/remote-image.pb-c.h"
 
 struct cr_pb_message_desc cr_pb_descs[PB_MAX];
 
diff --git a/images/Makefile b/images/Makefile
index cf50794..3753d62 100644
--- a/images/Makefile
+++ b/images/Makefile
@@ -60,6 +60,7 @@ proto-obj-y	+= binfmt-misc.o
 proto-obj-y	+= time.o
 proto-obj-y	+= sysctl.o
 proto-obj-y	+= autofs.o
+proto-obj-y	+= remote-image.o
 
 CFLAGS		+= -iquote $(obj)/
 
diff --git a/images/remote-image.proto b/images/remote-image.proto
new file mode 100644
index 0000000..1212627
--- /dev/null
+++ b/images/remote-image.proto
@@ -0,0 +1,20 @@
+message local_image_entry {
+	required string name		= 1;
+	required string snapshot_id	= 2;
+	required uint32 open_mode	= 3;
+}
+
+message remote_image_entry {
+	required string name		= 1;
+	required string snapshot_id	= 2;
+	required uint32 open_mode	= 3;
+	required uint64 size		= 4;
+}
+
+message local_image_reply_entry {
+	required uint32 error           = 1;
+}
+
+message snapshot_id_entry {
+	required string snapshot_id	= 1;
+}
-- 
2.7.3



More information about the CRIU mailing list