[CRIU] [PATCH 1/4] mem: Introduce image-proxy/image-cache & remote option

Mike Rapoport rppt at linux.vnet.ibm.com
Wed Aug 3 05:42:31 PDT 2016


On Tue, Aug 02, 2016 at 04:39:37PM +0000, Katerina Koukiou wrote:
> This patch introduces --remote option and image-proxy/image-cache processes.
> This leaves user the option to decide if the checkpoint data are to be stored
> on disk or sent through socket to the image-proxy.
> The latter forwards the data to the destination node where image-cache receives
> them.
> 
> The overall communication is performed as follows:
> rc_node CRIU dump -> (sends images using a local socket) ->     image-proxy
> 									   |
> 									   V
> dst_node: CRIU restore <- (receives images from a local socket)   <- image-cache
> 
> Running criu with --remote option is like this:
> 
> dst_node# criu image-cache --port <port> -o /tmp/image-cache.log
> --local-cache-path <local_cache_path> ...
> dst_node# criu restore --remote -o /tmp/image-cache.log
> --local-cache-path <local_cache_path> ...
> src_node# criu image-proxy --port <port> --address <dst_node> -o /tmp/image-proxy.log
> --local-proxy-path <local_proxy_path> ...
> src_node# criu dump -t <pid> --remote -o /tmp/dump.log
> --local-proxy-path <local_proxy_path> ...
> 
> Signed-off-by: Rodrigo Bruno <rbruno at gsd.inesc-id.pt>
> Signed-off-by: Katerina Koukiou <k.koukiou at gmail.com>
> ---
>  criu/Makefile.crtools        |   4 +
>  criu/cr-dump.c               |  15 +++
>  criu/crtools.c               |  30 ++++-
>  criu/image-desc.c            |   4 +-
>  criu/image.c                 |  26 ++++-
>  criu/img-remote.c            | 272 +++++++++++++++++++++++++++++++++++++++++++
>  criu/include/cr_options.h    |   3 +
>  criu/include/image.h         |   1 +
>  criu/include/protobuf-desc.h |   4 +
>  criu/include/util.h          |   1 +
>  criu/page-xfer.c             |  27 ++++-
>  criu/pagemap.c               |  48 ++++++--
>  criu/protobuf-desc.c         |   1 +
>  criu/util.c                  |  15 +++
>  images/Makefile              |   1 +
>  images/remote-image.proto    |  20 ++++
>  16 files changed, 449 insertions(+), 23 deletions(-)
>  create mode 100644 criu/img-remote.c
>  create mode 100644 images/remote-image.proto
> 
> diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
> index 2665f92..8189960 100644
> --- a/criu/Makefile.crtools
> +++ b/criu/Makefile.crtools
> @@ -26,6 +26,10 @@ obj-y			+= files-reg.o
>  obj-y			+= fsnotify.o
>  obj-y			+= image-desc.o
>  obj-y			+= image.o
> +obj-y			+= img-remote.o
> +obj-y			+= img-proxy.o
> +obj-y			+= img-cache.o
> +obj-y			+= img-remote-proto.o
>  obj-y			+= ipc_ns.o
>  obj-y			+= irmap.o
>  obj-y			+= kcmp-ids.o
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index be83bec..a8b5ab6 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -84,6 +84,8 @@
> 
>  #include "asm/dump.h"
> 
> +#include "img-remote.h"
> +
>  static char loc_buf[PAGE_SIZE];
> 
>  void free_mappings(struct vm_area_list *vma_area_list)
> @@ -1483,6 +1485,11 @@ int cr_pre_dump_tasks(pid_t pid)
>  	struct pstree_item *item;
>  	int ret = -1;
> 
> +	if (opts.remote && push_snapshot_id() < 0) {
> +		pr_err("Failed to push image namespace.\n");
> +		goto err;
> +	}
> +
>  	root_item = alloc_pstree_item();
>  	if (!root_item)
>  		goto err;
> @@ -1613,6 +1620,9 @@ static int cr_dump_finish(int ret)
> 
>  	close_service_fd(CR_PROC_FD_OFF);
> 
> +	if (opts.remote)
> +		finish_remote_dump();
> +

Please take care of possible errors.
Would it be correct to call finish_remote_dump even is ret != 0?
And, what happens if finish_remote_dump fails?

>  	if (ret) {
>  		pr_err("Dumping FAILED.\n");
>  	} else {
> @@ -1633,6 +1643,11 @@ int cr_dump_tasks(pid_t pid)
>  	pr_info("Dumping processes (pid: %d)\n", pid);
>  	pr_info("========================================\n");
> 
> +	if (opts.remote && push_snapshot_id() < 0) {
> +		pr_err("Failed to push image namespace.\n");
> +		goto err;
> +	}
> +
>  	root_item = alloc_pstree_item();
>  	if (!root_item)
>  		goto err;
> diff --git a/criu/crtools.c b/criu/crtools.c
> index a0b2fc9..4efca16 100644
> --- a/criu/crtools.c
> +++ b/criu/crtools.c
> @@ -48,6 +48,7 @@
> 
>  #include "setproctitle.h"
>  #include "sysctl.h"
> +#include "img-remote.h"
> 
>  struct cr_options opts;
> 
> @@ -72,6 +73,10 @@ void init_opts(void)
>  	opts.ghost_limit = DEFAULT_GHOST_LIMIT;
>  	opts.timeout = DEFAULT_TIMEOUT;
>  	opts.empty_ns = 0;
> +	opts.addr = DEFAULT_CACHE_HOST;
> +	opts.port = DEFAULT_CACHE_PORT;
> +	opts.local_cache_path = DEFAULT_IMG_PATH;
> +	opts.local_proxy_path = DEFAULT_IMG_PATH;
>  }
> 
>  static int parse_join_ns(const char *ptr)
> @@ -279,6 +284,9 @@ int main(int argc, char *argv[], char *envp[])
>  		{ "cgroup-props-file",		required_argument,	0, 1081	},
>  		{ "cgroup-dump-controller",	required_argument,	0, 1082	},
>  		{ SK_INFLIGHT_PARAM,		no_argument,		0, 1083	},
> +		{ "remote",			no_argument,		0, 1084 },
> +		{ "local-cache-path",		required_argument,	0, 1085 },
> +		{ "local-proxy-path",		required_argument,	0, 1086 },
>  		{ },
>  	};
> 
> @@ -587,6 +595,15 @@ int main(int argc, char *argv[], char *envp[])
>  			pr_msg("Will skip in-flight TCP connections\n");
>  			opts.tcp_skip_in_flight = true;
>  			break;
> +		case 1084:
> +			opts.remote = true;
> +			break;
> +		case 1085:
> +			opts.local_cache_path = optarg;
> +			break;
> +		case 1086:
> +			opts.local_proxy_path = optarg;
> +			break;
>  		case 'V':
>  			pr_msg("Version: %s\n", CRIU_VERSION);
>  			if (strcmp(CRIU_GITID, "0"))
> @@ -727,6 +744,12 @@ int main(int argc, char *argv[], char *envp[])
>  	if (!strcmp(argv[optind], "page-server"))
>  		return cr_page_server(opts.daemon_mode, -1) > 0 ? 0 : 1;
> 
> +	if (!strcmp(argv[optind], "image-cache"))
> +		return image_cache(opts.local_cache_path, opts.port);
> +
> +	if (!strcmp(argv[optind], "image-proxy"))
> +		return image_proxy(opts.local_proxy_path, opts.addr, opts.port);
> +
>  	if (!strcmp(argv[optind], "service"))
>  		return cr_service(opts.daemon_mode);
> 
> @@ -753,6 +776,8 @@ usage:
>  "  criu page-server\n"
>  "  criu service [<options>]\n"
>  "  criu dedup\n"
> +"  criu image-cache [<options>]\n"
> +"  criu image-proxy [<options>]\n"
>  "\n"
>  "Commands:\n"
>  "  dump           checkpoint a process/tree identified by pid\n"
> @@ -765,6 +790,8 @@ usage:
>  "  dedup          remove duplicates in memory dump\n"
>  "  cpuinfo dump   writes cpu information into image file\n"
>  "  cpuinfo check  validates cpu information read from image file\n"
> +"  image-cache    launch destination-side cache for images sent from the source-side\n"
> +"  image-proxy    launch source-side proxy to sent images to the destination-side\n"
>  	);
> 
>  	if (usage_error) {
> @@ -791,6 +818,7 @@ usage:
>  "                        restore making it the parent of the restored process\n"
>  "  --freeze-cgroup\n"
>  "                        use cgroup freezer to collect processes\n"
> +"  --remote              dump/restore images directly to/from remote node using image-proxy/image-cache\n"
>  "\n"
>  "* Special resources support:\n"
>  "  -x|--" USK_EXT_PARAM "inode,.." "      allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> @@ -900,7 +928,7 @@ usage:
>  "\n"
>  "Page/Service server options:\n"
>  "  --address ADDR        address of server or service\n"
> -"  --port PORT           port of page server\n"
> +"  --port PORT           port of page serve or service\n"
>  "  -d|--daemon           run in the background after creating socket\n"
>  "\n"
>  "Other options:\n"
> diff --git a/criu/image-desc.c b/criu/image-desc.c
> index 2b31354..e146ef8 100644
> --- a/criu/image-desc.c
> +++ b/criu/image-desc.c
> @@ -102,13 +102,13 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
>  	[CR_FD_STATS] = {
>  		.fmt	= "stats-%s",
>  		.magic	= STATS_MAGIC,
> -		.oflags = O_SERVICE,
> +		.oflags = O_SERVICE | O_FORCE_LOCAL,
>  	},
> 
>  	[CR_FD_IRMAP_CACHE] = {
>  		.fmt	= "irmap-cache",
>  		.magic	= IRMAP_CACHE_MAGIC,
> -		.oflags = O_SERVICE,
> +		.oflags = O_SERVICE | O_FORCE_LOCAL,
>  	},
> 
>  	[CR_FD_FILE_LOCKS_PID] = {
> diff --git a/criu/image.c b/criu/image.c
> index a3bb285..d5ecea2 100644
> --- a/criu/image.c
> +++ b/criu/image.c
> @@ -13,6 +13,7 @@
>  #include "protobuf.h"
>  #include "images/inventory.pb-c.h"
>  #include "images/pagemap.pb-c.h"
> +#include "img-remote.h"
> 
>  bool ns_per_id = false;
>  bool img_common_magic = true;
> @@ -309,9 +310,26 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of
>  {
>  	int ret, flags;
> 
> -	flags = oflags & ~(O_NOBUF | O_SERVICE);
> +	flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);
> 
> -	ret = openat(dfd, path, flags, CR_FD_PERM);
> +	if (opts.remote && !(oflags & O_FORCE_LOCAL)) {
> +		char *snapshot_id = NULL;
> +
> +		snapshot_id = get_snapshot_id_from_idx(dfd);
> +
> +		if (snapshot_id == NULL)
> +			ret = -1;
> +		else if (flags == O_RDONLY) {
> +			pr_info("do_open_remote_image RDONLY path=%s snapshot_id=%s\n",
> +					path, snapshot_id);

IMHO, pr_debug will suffice here

> +			ret = read_remote_image_connection(snapshot_id, path);
> +		} else {
> +			pr_info("do_open_remote_image WDONLY path=%s snapshot_id=%s\n",
> +					path, snapshot_id);

and here

> +			ret = write_remote_image_connection(snapshot_id, path, O_WRONLY);
> +		}
> +	} else
> +		ret = openat(dfd, path, flags, CR_FD_PERM);
>  	if (ret < 0) {
>  		if (!(flags & O_CREAT) && (errno == ENOENT)) {
>  			pr_info("No %s image\n", path);
> @@ -413,7 +431,9 @@ int open_image_dir(char *dir)
>  	close(fd);
>  	fd = ret;
> 
> -	if (opts.img_parent) {
> +	if (opts.remote) {
> +		init_snapshot_id(dir);
> +	} else if (opts.img_parent) {
>  		ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);
>  		if (ret < 0 && errno != EEXIST) {
>  			pr_perror("Can't link parent snapshot");
> diff --git a/criu/img-remote.c b/criu/img-remote.c
> new file mode 100644
> index 0000000..05f3666
> --- /dev/null
> +++ b/criu/img-remote.c
> @@ -0,0 +1,272 @@
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <netinet/in.h>
> +#include <netdb.h>
> +#include "xmalloc.h"
> +#include "criu-log.h"
> +#include "img-remote.h"
> +#include "img-remote-proto.h"
> +#include "images/remote-image.pb-c.h"
> +#include "protobuf-desc.h"
> +#include <fcntl.h>
> +#include "servicefd.h"
> +#include "compiler.h"
> +#include "cr_options.h"
> +
> +#define PB_LOCAL_IMAGE_SIZE PATHLEN
> +
> +static char *snapshot_id;
> +
> +LIST_HEAD(snapshot_head);
> +
> +/* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an
> + * ID which corresponds to the working directory specefied by the user.
> + */
> +struct snapshot {
> +	char snapshot_id[PATHLEN];
> +	struct list_head l;
> +};
> +
> +struct snapshot *new_snapshot(char *snapshot_id)
> +{
> +	struct snapshot *s = malloc(sizeof(struct snapshot));
> +
> +	if (!s)
> +		pr_perror("Failed to allocate snapshot structure");

Shouldn't we propagate allocation error?

> +	strncpy(s->snapshot_id, snapshot_id, PATHLEN);

because this fill SIGSEGV otherwise ;-)

> +	return s;
> +}
> +
> +void add_snapshot(struct snapshot *snapshot)
> +{
> +	list_add_tail(&(snapshot->l), &snapshot_head);
> +}
> +
> +static char *get_local_img_path(void)
> +{
> +	static char *local_img_path = NULL;
> +
> +	if (local_img_path != NULL)
> +		return local_img_path;
> +
> +	if (strcmp(opts.local_cache_path, DEFAULT_IMG_PATH))
> +		local_img_path = opts.local_cache_path;
> +	else if (strcmp(opts.local_proxy_path, DEFAULT_IMG_PATH))
> +		local_img_path = opts.local_proxy_path;
> +

It seems that if user will pass DEFAULT_IMG_PATH in opts.local_cache_path
and in opts.local_proxy_path, the local_img_path will remain NULL...

> +	return local_img_path;
> +}
> +
> +int read_remote_image_connection(char *snapshot_id, char *path)
> +{
> +	int error;
> +	int sockfd = setup_UNIX_client_socket(get_local_img_path());
> +
> +	if (sockfd < 0) {
> +		pr_perror("Error opening local connection for %s:%s", path, snapshot_id);
> +		return -1;
> +	}
> +
> +	if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) {
> +		pr_perror("Error writing header for %s:%s", path, snapshot_id);
> +		return -1;
> +	}
> +
> +	if (read_reply_header(sockfd, &error) < 0) {
> +		pr_perror("Error reading reply header for %s:%s", path, snapshot_id);
> +		return -1;
> +	}
> +	errno = error;

Can you please explain why do you assign errno value?

> +	if (!error)
> +		return sockfd;
> +	else if (error == ENOENT) {
> +		pr_info("Image does not exist (%s:%s)\n", path, snapshot_id);
> +		close(sockfd);
> +		return -1;
> +	}
> +	pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path, snapshot_id);
> +	close(sockfd);
> +	return -1;
> +}
> +
> +int write_remote_image_connection(char *snapshot_id, char *path, int flags)
> +{
> +	int sockfd = setup_UNIX_client_socket(get_local_img_path());
> +
> +	if (sockfd < 0)
> +		return -1;
> +
> +	if (write_header(sockfd, snapshot_id, path, flags) < 0) {
> +		pr_perror("Error writing header for %s:%s", path, snapshot_id);
> +		return -1;
> +	}
> +	return sockfd;
> +}
> +
> +int finish_remote_dump(void)
> +{
> +	pr_info("Dump side is calling finish\n");
> +	int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, DUMP_FINISH, O_WRONLY);
> +
> +	if (fd == -1) {
> +		pr_perror("Unable to open finish dump connection");
> +		return -1;
> +	}
> +
> +	close(fd);
> +	return 0;
> +}
> +
> +int skip_remote_bytes(int fd, unsigned long len)
> +{
> +	static char buf[4096];
> +	int n = 0;
> +	unsigned long curr = 0;
> +
> +	for (; curr < len; ) {
> +		n = read(fd, buf, min(len - curr, (unsigned long)4096));
> +		if (n == 0) {
> +			pr_perror("Unexpected end of stream (skipping %lx/%lx bytes)",
> +				curr, len);
> +			return -1;
> +		} else if (n > 0) {
> +			curr += n;
> +		} else {
> +			pr_perror("Error while skipping bytes from stream (%lx/%lx)",
> +				curr, len);
> +			return -1;
> +		}
> +	}
> +
> +	if (curr != len) {
> +		pr_perror("Unable to skip the current number of bytes: %lx instead of %lx",
> +			curr, len);
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static int pull_snapshot_ids(void)
> +{
> +	int n, sockfd;
> +	SnapshotIdEntry *ls;
> +	struct snapshot *s = NULL;
> +
> +	sockfd = read_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG);
> +
> +	/* The connection was successful but there is not file. */
> +	if (sockfd < 0 && errno == ENOENT)
> +		return 0;
> +	else if (sockfd < 0) {
> +		pr_perror("Unable to open snapshot id read connection");
> +		return -1;
> +	}
> +
> +	while (1) {
> +		n = pb_read_obj(sockfd, (void **)&ls, PB_SNAPSHOT_ID);
> +		if (!n) {
> +			close(sockfd);
> +			return n;
> +		} else if (n < 0) {
> +			pr_perror("Unable to read remote snapshot ids");
> +			close(sockfd);
> +			return n;
> +		}
> +
> +		s = new_snapshot(ls->snapshot_id);
> +		if (!s) {
> +			pr_perror("Unable create new snapshot structure");
> +			close(sockfd);
> +			return -1;
> +		}
> +		add_snapshot(s);
> +		pr_info("[read_snapshot ids] parent = %s\n", ls->snapshot_id);
> +	}
> +	free(ls);
> +	close(sockfd);
> +	return n;
> +}
> +
> +int push_snapshot_id(void)
> +{
> +	int n;
> +	SnapshotIdEntry rn = SNAPSHOT_ID_ENTRY__INIT;
> +	int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND);
> +
> +	if (sockfd < 0) {
> +		pr_perror("Unable to open snapshot id push connection");
> +		return -1;
> +	}
> +
> +	rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN);
> +	if (!rn.snapshot_id) {
> +		pr_perror("Unable to allocate snapshot id buffer");
> +		close(sockfd);
> +		return -1;
> +	}
> +	strncpy(rn.snapshot_id, snapshot_id, PATHLEN);
> +
> +	n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID);
> +
> +	xfree(rn.snapshot_id);
> +	close(sockfd);
> +	return n;
> +}
> +
> +void init_snapshot_id(char *si)
> +{
> +	snapshot_id = si;
> +}
> +
> +char *get_curr_snapshot_id(void)
> +{
> +	return snapshot_id;
> +}
> +
> +int get_curr_snapshot_id_idx(void)
> +{
> +	struct snapshot *si;
> +	int idx = 0;
> +
> +	if (list_empty(&snapshot_head))
> +		pull_snapshot_ids();
> +
> +	list_for_each_entry(si, &snapshot_head, l) {
> +	if (!strncmp(si->snapshot_id, snapshot_id, PATHLEN))
> +			return idx;
> +		idx++;
> +	}
> +
> +	pr_perror("Error, could not find current snapshot id (%s) fd", snapshot_id);
> +	return -1;
> +}
> +
> +char *get_snapshot_id_from_idx(int idx)
> +{
> +	struct snapshot *si;
> +
> +	if (list_empty(&snapshot_head))
> +		pull_snapshot_ids();
> +
> +	/* Note: if idx is the service fd then we need the current
> +	 * snapshot_id idx. Else we need a parent snapshot_id idx.
> +	 */
> +	if (idx == get_service_fd(IMG_FD_OFF))
> +		idx = get_curr_snapshot_id_idx();
> +
> +	list_for_each_entry(si, &snapshot_head, l) {
> +		if (!idx)
> +			return si->snapshot_id;
> +		idx--;
> +	}
> +
> +	pr_perror("Error, could not find snapshot id for idx %d", idx);
> +	return NULL;
> +}
> +
> +int get_curr_parent_snapshot_id_idx(void)
> +{
> +	return get_curr_snapshot_id_idx() - 1;
> +}
> diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
> index 8eeadc9..ada8369 100644
> --- a/criu/include/cr_options.h
> +++ b/criu/include/cr_options.h
> @@ -111,6 +111,9 @@ struct cr_options {
>  	unsigned int		empty_ns;
>  	bool			tcp_skip_in_flight;
>  	char			*work_dir;
> +	bool			remote;
> +	char			*local_cache_path;
> +	char			*local_proxy_path;
>  };
> 
>  extern struct cr_options opts;
> diff --git a/criu/include/image.h b/criu/include/image.h
> index 65b7b0a..9ba6ab8 100644
> --- a/criu/include/image.h
> +++ b/criu/include/image.h
> @@ -104,6 +104,7 @@ extern bool img_common_magic;
>  #define O_DUMP		(O_WRONLY | O_CREAT | O_TRUNC)
>  #define O_SHOW		(O_RDONLY | O_NOBUF)
>  #define O_RSTR		(O_RDONLY)
> +#define O_FORCE_LOCAL   (O_SYNC)
> 
>  struct cr_img {
>  	union {
> diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
> index 6c76b49..43ac534 100644
> --- a/criu/include/protobuf-desc.h
> +++ b/criu/include/protobuf-desc.h
> @@ -59,6 +59,10 @@ enum {
>  	PB_BINFMT_MISC,		/* 50 */
>  	PB_TTY_DATA,
>  	PB_AUTOFS,
> +	PB_REMOTE_IMAGE,        /* Header for images sent from proxy to cache.*/
> +	PB_LOCAL_IMAGE,         /* Header for reading/writing images from/to proxy or cache. */
> +	PB_LOCAL_IMAGE_REPLY,	/* Header for reading/writing images reply. */
> +	PB_SNAPSHOT_ID,         /* Contains a single id. Used for reading/writing ids from proxy or cache. */
> 
>  	/* PB_AUTOGEN_STOP */
> 
> diff --git a/criu/include/util.h b/criu/include/util.h
> index 5b7cad1..63be0ea 100644
> --- a/criu/include/util.h
> +++ b/criu/include/util.h
> @@ -259,6 +259,7 @@ FILE *fopenat(int dirfd, char *path, char *cflags);
>  void split(char *str, char token, char ***out, int *n);
> 
>  int fd_has_data(int lfd);
> +size_t read_into_buffer(int fd, char *buff, size_t size);
> 
>  int make_yard(char *path);
> 
> diff --git a/criu/page-xfer.c b/criu/page-xfer.c
> index 7978227..3c668d7 100644
> --- a/criu/page-xfer.c
> +++ b/criu/page-xfer.c
> @@ -17,6 +17,8 @@
>  #include "protobuf.h"
>  #include "images/pagemap.pb-c.h"
> 
> +#include "img-remote.h"
> +
>  static int page_server_sk = -1;
> 
>  struct page_server_iov {
> @@ -288,7 +290,9 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
>  			goto out;
> 
>  		xfer->parent = xmalloc(sizeof(*xfer->parent));
> -		if (!xfer->parent) {
> +		if (!xfer->parent && opts.remote) {
> +			return -1;
> +		} else if (!xfer->parent) {

I think using

		if (!xfer->parent) {
			if (!opts.remote)
				close(pfd);
			return -1;
		}

is little bit less obscure.

>  			close(pfd);
>  			return -1;
>  		}
> @@ -298,10 +302,12 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
>  			pr_perror("No parent image found, though parent directory is set");
>  			xfree(xfer->parent);
>  			xfer->parent = NULL;
> -			close(pfd);
> +			if (!opts.remote)
> +				close(pfd);
>  			goto out;
>  		}
> -		close(pfd);
> +		if (!opts.remote)
> +			close(pfd);
>  	}
> 
>  out:
> @@ -406,9 +412,16 @@ int check_parent_local_xfer(int fd_type, int id)
>  	struct stat st;
>  	int ret, pfd;
> 
> -	pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
> -	if (pfd < 0 && errno == ENOENT)
> -		return 0;
> +	if (opts.remote) {
> +		pfd = get_curr_parent_snapshot_id_idx();
> +		pr_err("Unable to get parent snapsgot id");
> +		if (pfd == -1)
> +			return -1;
> +	} else {
> +		pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
> +		if (pfd < 0 && errno == ENOENT)
> +			return 0;
> +	}
> 
>  	snprintf(path, sizeof(path), imgset_template[fd_type].fmt, id);
>  	ret = fstatat(pfd, path, &st, 0);
> @@ -470,6 +483,8 @@ int check_parent_page_xfer(int fd_type, long id)
>  {
>  	if (opts.use_page_server)
>  		return check_parent_server_xfer(fd_type, id);
> +	else if (opts.remote)
> +		return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1;
>  	else
>  		return check_parent_local_xfer(fd_type, id);
>  }
> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index dbe9cc4..693f967 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -11,6 +11,8 @@
>  #include "protobuf.h"
>  #include "images/pagemap.pb-c.h"
> 
> +#include "img-remote.h"
> +
>  #ifndef SEEK_DATA
>  #define SEEK_DATA	3
>  #define SEEK_HOLE	4
> @@ -124,11 +126,18 @@ int dedup_one_iovec(struct page_read *pr, struct iovec *iov)
>  static int get_pagemap(struct page_read *pr, struct iovec *iov)
>  {
>  	PagemapEntry *pe;
> +	int ret;
> 
> -	if (pr->curr_pme >= pr->nr_pmes)
> -		return 0;
> +	if (opts.remote) {
> +		ret = pb_read_one_eof(pr->pmi, &pe, PB_PAGEMAP);
> +		if (ret <= 0)
> +			return ret;
> +	} else {
> +		if (pr->curr_pme >= pr->nr_pmes)
> +			return 0;
> 
> -	pe = pr->pmes[pr->curr_pme];
> +		pe = pr->pmes[pr->curr_pme];
> +	}
> 
>  	pagemap2iovec(pe, iov);
> 
> @@ -261,7 +270,7 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
>  		off_t current_vaddr = lseek(fd, pr->pi_off, SEEK_SET);
> 
>  		pr_debug("\tpr%u Read page from self %lx/%"PRIx64"\n", pr->id, pr->cvaddr, current_vaddr);
> -		ret = read(fd, buf, len);
> +		ret = read_into_buffer(fd, buf, len);
>  		if (ret != len) {
>  			pr_perror("Can't read mapping page %d", ret);
>  			return -1;
> @@ -314,7 +323,7 @@ static void close_page_read(struct page_read *pr)
>  	if (pr->pi)
>  		close_image(pr->pi);
> 
> -	if (pr->pmes)
> +	if (!opts.remote && pr->pmes)
>  		free_pagemaps(pr);
>  }
> 
> @@ -323,9 +332,24 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	int pfd, ret;
>  	struct page_read *parent = NULL;
> 
> -	pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
> -	if (pfd < 0 && errno == ENOENT)
> -		goto out;
> +	if (opts.remote) {
> +		/* Note: we are replacing a real directory FD for a snapshot_id
> +		 * index. Since we need the parent of the current snapshot_id,
> +		 * we want the current snapshot_id index minus one. It is
> +		 * possible that dfd is already a snapshot_id index. We test it
> +		 * by comparing it to the service FD. When opening an image (see
> +		 * do_open_image) we convert the snapshot_id index into a real
> +		 * snapshot_id.
> +		 */
> +		pfd = dfd == get_service_fd(IMG_FD_OFF) ?
> +			get_curr_snapshot_id_idx() - 1 : dfd - 1;
> +		if (pfd < 0)
> +			goto out;
> +	} else {
> +		pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
> +		if (pfd < 0 && errno == ENOENT)
> +			goto out;
> +	}
> 
>  	parent = xmalloc(sizeof(*parent));
>  	if (!parent)
> @@ -348,7 +372,8 @@ out:
>  err_free:
>  	xfree(parent);
>  err_cl:
> -	close(pfd);
> +	if (!opts.remote)
> +		close(pfd);
>  	return -1;
>  }
> 
> @@ -458,7 +483,7 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  		return -1;
>  	}
> 
> -	if (init_pagemaps(pr)) {
> +	if (!opts.remote && init_pagemaps(pr)) {

Is there anything that prevents using init_pagemaps with opts.remote?
Why cannot we just read the entire pagemap from the socket as we read it
from local file?

>  		close_page_read(pr);
>  		return -1;
>  	}
> @@ -467,7 +492,8 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	pr->put_pagemap = put_pagemap;
>  	pr->read_pages = read_pagemap_page;
>  	pr->close = close_page_read;
> -	pr->seek_page = seek_pagemap_page;
> +	if (!opts.remote)
> +		pr->seek_page = seek_pagemap_page;
>  	pr->id = ids++;
> 
>  	pr_debug("Opened page read %u (parent %u)\n",
> diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c
> index 9352a76..c1850f9 100644
> --- a/criu/protobuf-desc.c
> +++ b/criu/protobuf-desc.c
> @@ -64,6 +64,7 @@
>  #include "images/seccomp.pb-c.h"
>  #include "images/binfmt-misc.pb-c.h"
>  #include "images/autofs.pb-c.h"
> +#include "images/remote-image.pb-c.h"
> 
>  struct cr_pb_message_desc cr_pb_descs[PB_MAX];
> 
> diff --git a/criu/util.c b/criu/util.c
> index c44d900..5e2f400 100644
> --- a/criu/util.c
> +++ b/criu/util.c
> @@ -1184,3 +1184,18 @@ int setup_tcp_client(char *addr)
> 
>  	return sk;
>  }
> +
> +size_t read_into_buffer(int fd, char *buff, size_t size)
> +{

Can you please explain why this wrapper is required?

> +	size_t n = 0;
> +	size_t curr = 0;
> +
> +	while (1) {
> +		n  = read(fd, buff + curr, size - curr);
> +		if (n < 1)
> +			return n;
> +		curr += n;
> +		if (curr == size)
> +			return size;
> +	}
> +}
> diff --git a/images/Makefile b/images/Makefile
> index cf50794..3753d62 100644
> --- a/images/Makefile
> +++ b/images/Makefile
> @@ -60,6 +60,7 @@ proto-obj-y	+= binfmt-misc.o
>  proto-obj-y	+= time.o
>  proto-obj-y	+= sysctl.o
>  proto-obj-y	+= autofs.o
> +proto-obj-y	+= remote-image.o
> 
>  CFLAGS		+= -iquote $(obj)/
> 
> diff --git a/images/remote-image.proto b/images/remote-image.proto
> new file mode 100644
> index 0000000..1212627
> --- /dev/null
> +++ b/images/remote-image.proto
> @@ -0,0 +1,20 @@
> +message local_image_entry {
> +	required string name		= 1;
> +	required string snapshot_id	= 2;
> +	required uint32 open_mode	= 3;
> +}
> +
> +message remote_image_entry {
> +	required string name		= 1;
> +	required string snapshot_id	= 2;
> +	required uint32 open_mode	= 3;
> +	required uint64 size		= 4;
> +}
> +
> +message local_image_reply_entry {
> +	required uint32 error           = 1;
> +}
> +
> +message snapshot_id_entry {
> +	required string snapshot_id	= 1;
> +}
> -- 
> 2.7.3
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
> 



More information about the CRIU mailing list