<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Aug 3, 2016 at 12:42 PM, Mike Rapoport <span dir="ltr"><<a href="mailto:rppt@linux.vnet.ibm.com" target="_blank">rppt@linux.vnet.ibm.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div><div>On Tue, Aug 02, 2016 at 04:39:37PM +0000, Katerina Koukiou wrote:<br>
> This patch introduces --remote option and image-proxy/image-cache processes.<br>
> This leaves user the option to decide if the checkpoint data are to be stored<br>
> on disk or sent through socket to the image-proxy.<br>
> The latter forwards the data to the destination node where image-cache receives<br>
> them.<br>
><br>
> The overall communication is performed as follows:<br>
> rc_node CRIU dump -> (sends images using a local socket) -> image-proxy<br>
> |<br>
> V<br>
> dst_node: CRIU restore <- (receives images from a local socket) <- image-cache<br>
><br>
> Running criu with --remote option is like this:<br>
><br>
> dst_node# criu image-cache --port <port> -o /tmp/image-cache.log<br>
> --local-cache-path <local_cache_path> ...<br>
> dst_node# criu restore --remote -o /tmp/image-cache.log<br>
> --local-cache-path <local_cache_path> ...<br>
> src_node# criu image-proxy --port <port> --address <dst_node> -o /tmp/image-proxy.log<br>
> --local-proxy-path <local_proxy_path> ...<br>
> src_node# criu dump -t <pid> --remote -o /tmp/dump.log<br>
> --local-proxy-path <local_proxy_path> ...<br>
><br>
> Signed-off-by: Rodrigo Bruno <rbruno at <a href="http://gsd.inesc-id.pt" rel="noreferrer" target="_blank">gsd.inesc-id.pt</a>><br>
> Signed-off-by: Katerina Koukiou <<a href="mailto:k.koukiou@gmail.com" target="_blank">k.koukiou@gmail.com</a>><br>
> ---<br>
> criu/Makefile.crtools | 4 +<br>
> criu/cr-dump.c | 15 +++<br>
> criu/crtools.c | 30 ++++-<br>
> criu/image-desc.c | 4 +-<br>
> criu/image.c | 26 ++++-<br>
> criu/img-remote.c | 272 +++++++++++++++++++++++++++++++++++++++++++<br>
> criu/include/cr_options.h | 3 +<br>
> criu/include/image.h | 1 +<br>
> criu/include/protobuf-desc.h | 4 +<br>
> criu/include/util.h | 1 +<br>
> criu/page-xfer.c | 27 ++++-<br>
> criu/pagemap.c | 48 ++++++--<br>
> criu/protobuf-desc.c | 1 +<br>
> criu/util.c | 15 +++<br>
> images/Makefile | 1 +<br>
> images/remote-image.proto | 20 ++++<br>
> 16 files changed, 449 insertions(+), 23 deletions(-)<br>
> create mode 100644 criu/img-remote.c<br>
> create mode 100644 images/remote-image.proto<br>
><br>
> diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools<br>
> index 2665f92..8189960 100644<br>
> --- a/criu/Makefile.crtools<br>
> +++ b/criu/Makefile.crtools<br>
> @@ -26,6 +26,10 @@ obj-y += files-reg.o<br>
> obj-y += fsnotify.o<br>
> obj-y += image-desc.o<br>
> obj-y += image.o<br>
> +obj-y += img-remote.o<br>
> +obj-y += img-proxy.o<br>
> +obj-y += img-cache.o<br>
> +obj-y += img-remote-proto.o<br>
> obj-y += ipc_ns.o<br>
> obj-y += irmap.o<br>
> obj-y += kcmp-ids.o<br>
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c<br>
> index be83bec..a8b5ab6 100644<br>
> --- a/criu/cr-dump.c<br>
> +++ b/criu/cr-dump.c<br>
> @@ -84,6 +84,8 @@<br>
><br>
> #include "asm/dump.h"<br>
><br>
> +#include "img-remote.h"<br>
> +<br>
> static char loc_buf[PAGE_SIZE];<br>
><br>
> void free_mappings(struct vm_area_list *vma_area_list)<br>
> @@ -1483,6 +1485,11 @@ int cr_pre_dump_tasks(pid_t pid)<br>
> struct pstree_item *item;<br>
> int ret = -1;<br>
><br>
> + if (opts.remote && push_snapshot_id() < 0) {<br>
> + pr_err("Failed to push image namespace.\n");<br>
> + goto err;<br>
> + }<br>
> +<br>
> root_item = alloc_pstree_item();<br>
> if (!root_item)<br>
> goto err;<br>
> @@ -1613,6 +1620,9 @@ static int cr_dump_finish(int ret)<br>
><br>
> close_service_fd(CR_PROC_FD_OFF);<br>
><br>
> + if (opts.remote)<br>
> + finish_remote_dump();<br>
> +<br>
<br>
</div></div>Please take care of possible errors.<br>
Would it be correct to call finish_remote_dump even is ret != 0?<br>
And, what happens if finish_remote_dump fails?<br></blockquote><div>Fixed that. <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div><div><br>
> if (ret) {<br>
> pr_err("Dumping FAILED.\n");<br>
> } else {<br>
> @@ -1633,6 +1643,11 @@ int cr_dump_tasks(pid_t pid)<br>
> pr_info("Dumping processes (pid: %d)\n", pid);<br>
> pr_info("========================================\n");<br>
><br>
> + if (opts.remote && push_snapshot_id() < 0) {<br>
> + pr_err("Failed to push image namespace.\n");<br>
> + goto err;<br>
> + }<br>
> +<br>
> root_item = alloc_pstree_item();<br>
> if (!root_item)<br>
> goto err;<br>
> diff --git a/criu/crtools.c b/criu/crtools.c<br>
> index a0b2fc9..4efca16 100644<br>
> --- a/criu/crtools.c<br>
> +++ b/criu/crtools.c<br>
> @@ -48,6 +48,7 @@<br>
><br>
> #include "setproctitle.h"<br>
> #include "sysctl.h"<br>
> +#include "img-remote.h"<br>
><br>
> struct cr_options opts;<br>
><br>
> @@ -72,6 +73,10 @@ void init_opts(void)<br>
> opts.ghost_limit = DEFAULT_GHOST_LIMIT;<br>
> opts.timeout = DEFAULT_TIMEOUT;<br>
> opts.empty_ns = 0;<br>
> + opts.addr = DEFAULT_CACHE_HOST;<br>
> + opts.port = DEFAULT_CACHE_PORT;<br>
> + opts.local_cache_path = DEFAULT_IMG_PATH;<br>
> + opts.local_proxy_path = DEFAULT_IMG_PATH;<br>
> }<br>
><br>
> static int parse_join_ns(const char *ptr)<br>
> @@ -279,6 +284,9 @@ int main(int argc, char *argv[], char *envp[])<br>
> { "cgroup-props-file", required_argument, 0, 1081 },<br>
> { "cgroup-dump-controller", required_argument, 0, 1082 },<br>
> { SK_INFLIGHT_PARAM, no_argument, 0, 1083 },<br>
> + { "remote", no_argument, 0, 1084 },<br>
> + { "local-cache-path", required_argument, 0, 1085 },<br>
> + { "local-proxy-path", required_argument, 0, 1086 },<br>
> { },<br>
> };<br>
><br>
> @@ -587,6 +595,15 @@ int main(int argc, char *argv[], char *envp[])<br>
> pr_msg("Will skip in-flight TCP connections\n");<br>
> opts.tcp_skip_in_flight = true;<br>
> break;<br>
> + case 1084:<br>
> + opts.remote = true;<br>
> + break;<br>
> + case 1085:<br>
> + opts.local_cache_path = optarg;<br>
> + break;<br>
> + case 1086:<br>
> + opts.local_proxy_path = optarg;<br>
> + break;<br>
> case 'V':<br>
> pr_msg("Version: %s\n", CRIU_VERSION);<br>
> if (strcmp(CRIU_GITID, "0"))<br>
> @@ -727,6 +744,12 @@ int main(int argc, char *argv[], char *envp[])<br>
> if (!strcmp(argv[optind], "page-server"))<br>
> return cr_page_server(opts.daemon_mode, -1) > 0 ? 0 : 1;<br>
><br>
> + if (!strcmp(argv[optind], "image-cache"))<br>
> + return image_cache(opts.local_cache_path, opts.port);<br>
> +<br>
> + if (!strcmp(argv[optind], "image-proxy"))<br>
> + return image_proxy(opts.local_proxy_path, opts.addr, opts.port);<br>
> +<br>
> if (!strcmp(argv[optind], "service"))<br>
> return cr_service(opts.daemon_mode);<br>
><br>
> @@ -753,6 +776,8 @@ usage:<br>
> " criu page-server\n"<br>
> " criu service [<options>]\n"<br>
> " criu dedup\n"<br>
> +" criu image-cache [<options>]\n"<br>
> +" criu image-proxy [<options>]\n"<br>
> "\n"<br>
> "Commands:\n"<br>
> " dump checkpoint a process/tree identified by pid\n"<br>
> @@ -765,6 +790,8 @@ usage:<br>
> " dedup remove duplicates in memory dump\n"<br>
> " cpuinfo dump writes cpu information into image file\n"<br>
> " cpuinfo check validates cpu information read from image file\n"<br>
> +" image-cache launch destination-side cache for images sent from the source-side\n"<br>
> +" image-proxy launch source-side proxy to sent images to the destination-side\n"<br>
> );<br>
><br>
> if (usage_error) {<br>
> @@ -791,6 +818,7 @@ usage:<br>
> " restore making it the parent of the restored process\n"<br>
> " --freeze-cgroup\n"<br>
> " use cgroup freezer to collect processes\n"<br>
> +" --remote dump/restore images directly to/from remote node using image-proxy/image-cache\n"<br>
> "\n"<br>
> "* Special resources support:\n"<br>
> " -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"<br>
> @@ -900,7 +928,7 @@ usage:<br>
> "\n"<br>
> "Page/Service server options:\n"<br>
> " --address ADDR address of server or service\n"<br>
> -" --port PORT port of page server\n"<br>
> +" --port PORT port of page serve or service\n"<br>
> " -d|--daemon run in the background after creating socket\n"<br>
> "\n"<br>
> "Other options:\n"<br>
> diff --git a/criu/image-desc.c b/criu/image-desc.c<br>
> index 2b31354..e146ef8 100644<br>
> --- a/criu/image-desc.c<br>
> +++ b/criu/image-desc.c<br>
> @@ -102,13 +102,13 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {<br>
> [CR_FD_STATS] = {<br>
> .fmt = "stats-%s",<br>
> .magic = STATS_MAGIC,<br>
> - .oflags = O_SERVICE,<br>
> + .oflags = O_SERVICE | O_FORCE_LOCAL,<br>
> },<br>
><br>
> [CR_FD_IRMAP_CACHE] = {<br>
> .fmt = "irmap-cache",<br>
> .magic = IRMAP_CACHE_MAGIC,<br>
> - .oflags = O_SERVICE,<br>
> + .oflags = O_SERVICE | O_FORCE_LOCAL,<br>
> },<br>
><br>
> [CR_FD_FILE_LOCKS_PID] = {<br>
> diff --git a/criu/image.c b/criu/image.c<br>
> index a3bb285..d5ecea2 100644<br>
> --- a/criu/image.c<br>
> +++ b/criu/image.c<br>
> @@ -13,6 +13,7 @@<br>
> #include "protobuf.h"<br>
> #include "images/inventory.pb-c.h"<br>
> #include "images/pagemap.pb-c.h"<br>
> +#include "img-remote.h"<br>
><br>
> bool ns_per_id = false;<br>
> bool img_common_magic = true;<br>
> @@ -309,9 +310,26 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of<br>
> {<br>
> int ret, flags;<br>
><br>
> - flags = oflags & ~(O_NOBUF | O_SERVICE);<br>
> + flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);<br>
><br>
> - ret = openat(dfd, path, flags, CR_FD_PERM);<br>
> + if (opts.remote && !(oflags & O_FORCE_LOCAL)) {<br>
> + char *snapshot_id = NULL;<br>
> +<br>
> + snapshot_id = get_snapshot_id_from_idx(dfd);<br>
> +<br>
> + if (snapshot_id == NULL)<br>
> + ret = -1;<br>
> + else if (flags == O_RDONLY) {<br>
> + pr_info("do_open_remote_image RDONLY path=%s snapshot_id=%s\n",<br>
> + path, snapshot_id);<br>
<br>
</div></div>IMHO, pr_debug will suffice here<br>
<span><br>
> + ret = read_remote_image_connection(snapshot_id, path);<br>
> + } else {<br>
> + pr_info("do_open_remote_image WDONLY path=%s snapshot_id=%s\n",<br>
> + path, snapshot_id);<br>
<br>
</span>and here<br></blockquote><div>Right, changed these. <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div><div><br>
> + ret = write_remote_image_connection(snapshot_id, path, O_WRONLY);<br>
> + }<br>
> + } else<br>
> + ret = openat(dfd, path, flags, CR_FD_PERM);<br>
> if (ret < 0) {<br>
> if (!(flags & O_CREAT) && (errno == ENOENT)) {<br>
> pr_info("No %s image\n", path);<br>
> @@ -413,7 +431,9 @@ int open_image_dir(char *dir)<br>
> close(fd);<br>
> fd = ret;<br>
><br>
> - if (opts.img_parent) {<br>
> + if (opts.remote) {<br>
> + init_snapshot_id(dir);<br>
> + } else if (opts.img_parent) {<br>
> ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);<br>
> if (ret < 0 && errno != EEXIST) {<br>
> pr_perror("Can't link parent snapshot");<br>
> diff --git a/criu/img-remote.c b/criu/img-remote.c<br>
> new file mode 100644<br>
> index 0000000..05f3666<br>
> --- /dev/null<br>
> +++ b/criu/img-remote.c<br>
> @@ -0,0 +1,272 @@<br>
> +#include <unistd.h><br>
> +#include <stdlib.h><br>
> +#include <sys/types.h><br>
> +#include <sys/socket.h><br>
> +#include <netinet/in.h><br>
> +#include <netdb.h><br>
> +#include "xmalloc.h"<br>
> +#include "criu-log.h"<br>
> +#include "img-remote.h"<br>
> +#include "img-remote-proto.h"<br>
> +#include "images/remote-image.pb-c.h"<br>
> +#include "protobuf-desc.h"<br>
> +#include <fcntl.h><br>
> +#include "servicefd.h"<br>
> +#include "compiler.h"<br>
> +#include "cr_options.h"<br>
> +<br>
> +#define PB_LOCAL_IMAGE_SIZE PATHLEN<br>
> +<br>
> +static char *snapshot_id;<br>
> +<br>
> +LIST_HEAD(snapshot_head);<br>
> +<br>
> +/* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an<br>
> + * ID which corresponds to the working directory specefied by the user.<br>
> + */<br>
> +struct snapshot {<br>
> + char snapshot_id[PATHLEN];<br>
> + struct list_head l;<br>
> +};<br>
> +<br>
> +struct snapshot *new_snapshot(char *snapshot_id)<br>
> +{<br>
> + struct snapshot *s = malloc(sizeof(struct snapshot));<br>
> +<br>
> + if (!s)<br>
> + pr_perror("Failed to allocate snapshot structure");<br>
<br>
</div></div>Shouldn't we propagate allocation error?<br>
<span><br>
> + strncpy(s->snapshot_id, snapshot_id, PATHLEN);<br>
<br>
</span>because this fill SIGSEGV otherwise ;-)<span></span></blockquote><div> <br>Fixed that. <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><span>
> + return s;<br>
> +}<br>
> +<br>
> +void add_snapshot(struct snapshot *snapshot)<br>
> +{<br>
> + list_add_tail(&(snapshot->l), &snapshot_head);<br>
> +}<br>
> +<br>
> +static char *get_local_img_path(void)<br>
> +{<br>
> + static char *local_img_path = NULL;<br>
> +<br>
> + if (local_img_path != NULL)<br>
> + return local_img_path;<br>
> +<br>
> + if (strcmp(opts.local_cache_path, DEFAULT_IMG_PATH))<br>
> + local_img_path = opts.local_cache_path;<br>
> + else if (strcmp(opts.local_proxy_path, DEFAULT_IMG_PATH))<br>
> + local_img_path = opts.local_proxy_path;<br>
> +<br>
<br>
</span>It seems that if user will pass DEFAULT_IMG_PATH in opts.local_cache_path<br>
and in opts.local_proxy_path, the local_img_path will remain NULL...<br>
<span><br></span></blockquote><div>Fixed that. <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><span>
> + return local_img_path;<br>
> +}<br>
> +<br>
> +int read_remote_image_connection(char *snapshot_id, char *path)<br>
> +{<br>
> + int error;<br>
> + int sockfd = setup_UNIX_client_socket(get_local_img_path());<br>
> +<br>
> + if (sockfd < 0) {<br>
> + pr_perror("Error opening local connection for %s:%s", path, snapshot_id);<br>
> + return -1;<br>
> + }<br>
> +<br>
> + if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) {<br>
> + pr_perror("Error writing header for %s:%s", path, snapshot_id);<br>
> + return -1;<br>
> + }<br>
> +<br>
> + if (read_reply_header(sockfd, &error) < 0) {<br>
> + pr_perror("Error reading reply header for %s:%s", path, snapshot_id);<br>
> + return -1;<br>
> + }<br>
> + errno = error;<br>
<br>
</span>Can you please explain why do you assign errno value?<br>
<div><div><br></div></div></blockquote><div>Yes, the code is not mine; I just rebased it. But I can tell you what I understand.<br></div><div>In criu/image.c when an image does not exist read_remote_image_connection<br></div><div>returns -1. In image.c we want to handle this case separately when <br>read_remote_image_connection returns -1 but the reason is error code ENOENT.<br>Then because read_reply_header does not assign errno but error variable,<br></div><div>we do the "errno = error" assignment by ourselves.<br></div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div><div>
> + if (!error)<br>
> + return sockfd;<br>
> + else if (error == ENOENT) {<br>
> + pr_info("Image does not exist (%s:%s)\n", path, snapshot_id);<br>
> + close(sockfd);<br>
> + return -1;<br>
> + }<br>
> + pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path, snapshot_id);<br>
> + close(sockfd);<br>
> + return -1;<br>
> +}<br>
> +<br>
> +int write_remote_image_connection(char *snapshot_id, char *path, int flags)<br>
> +{<br>
> + int sockfd = setup_UNIX_client_socket(get_local_img_path());<br>
> +<br>
> + if (sockfd < 0)<br>
> + return -1;<br>
> +<br>
> + if (write_header(sockfd, snapshot_id, path, flags) < 0) {<br>
> + pr_perror("Error writing header for %s:%s", path, snapshot_id);<br>
> + return -1;<br>
> + }<br>
> + return sockfd;<br>
> +}<br>
> +<br>
> +int finish_remote_dump(void)<br>
> +{<br>
> + pr_info("Dump side is calling finish\n");<br>
> + int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, DUMP_FINISH, O_WRONLY);<br>
> +<br>
> + if (fd == -1) {<br>
> + pr_perror("Unable to open finish dump connection");<br>
> + return -1;<br>
> + }<br>
> +<br>
> + close(fd);<br>
> + return 0;<br>
> +}<br>
> +<br>
> +int skip_remote_bytes(int fd, unsigned long len)<br>
> +{<br>
> + static char buf[4096];<br>
> + int n = 0;<br>
> + unsigned long curr = 0;<br>
> +<br>
> + for (; curr < len; ) {<br>
> + n = read(fd, buf, min(len - curr, (unsigned long)4096));<br>
> + if (n == 0) {<br>
> + pr_perror("Unexpected end of stream (skipping %lx/%lx bytes)",<br>
> + curr, len);<br>
> + return -1;<br>
> + } else if (n > 0) {<br>
> + curr += n;<br>
> + } else {<br>
> + pr_perror("Error while skipping bytes from stream (%lx/%lx)",<br>
> + curr, len);<br>
> + return -1;<br>
> + }<br>
> + }<br>
> +<br>
> + if (curr != len) {<br>
> + pr_perror("Unable to skip the current number of bytes: %lx instead of %lx",<br>
> + curr, len);<br>
> + return -1;<br>
> + }<br>
> + return 0;<br>
> +}<br>
> +<br>
> +static int pull_snapshot_ids(void)<br>
> +{<br>
> + int n, sockfd;<br>
> + SnapshotIdEntry *ls;<br>
> + struct snapshot *s = NULL;<br>
> +<br>
> + sockfd = read_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG);<br>
> +<br>
> + /* The connection was successful but there is not file. */<br>
> + if (sockfd < 0 && errno == ENOENT)<br>
> + return 0;<br>
> + else if (sockfd < 0) {<br>
> + pr_perror("Unable to open snapshot id read connection");<br>
> + return -1;<br>
> + }<br>
> +<br>
> + while (1) {<br>
> + n = pb_read_obj(sockfd, (void **)&ls, PB_SNAPSHOT_ID);<br>
> + if (!n) {<br>
> + close(sockfd);<br>
> + return n;<br>
> + } else if (n < 0) {<br>
> + pr_perror("Unable to read remote snapshot ids");<br>
> + close(sockfd);<br>
> + return n;<br>
> + }<br>
> +<br>
> + s = new_snapshot(ls->snapshot_id);<br>
> + if (!s) {<br>
> + pr_perror("Unable create new snapshot structure");<br>
> + close(sockfd);<br>
> + return -1;<br>
> + }<br>
> + add_snapshot(s);<br>
> + pr_info("[read_snapshot ids] parent = %s\n", ls->snapshot_id);<br>
> + }<br>
> + free(ls);<br>
> + close(sockfd);<br>
> + return n;<br>
> +}<br>
> +<br>
> +int push_snapshot_id(void)<br>
> +{<br>
> + int n;<br>
> + SnapshotIdEntry rn = SNAPSHOT_ID_ENTRY__INIT;<br>
> + int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND);<br>
> +<br>
> + if (sockfd < 0) {<br>
> + pr_perror("Unable to open snapshot id push connection");<br>
> + return -1;<br>
> + }<br>
> +<br>
> + rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN);<br>
> + if (!rn.snapshot_id) {<br>
> + pr_perror("Unable to allocate snapshot id buffer");<br>
> + close(sockfd);<br>
> + return -1;<br>
> + }<br>
> + strncpy(rn.snapshot_id, snapshot_id, PATHLEN);<br>
> +<br>
> + n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID);<br>
> +<br>
> + xfree(rn.snapshot_id);<br>
> + close(sockfd);<br>
> + return n;<br>
> +}<br>
> +<br>
> +void init_snapshot_id(char *si)<br>
> +{<br>
> + snapshot_id = si;<br>
> +}<br>
> +<br>
> +char *get_curr_snapshot_id(void)<br>
> +{<br>
> + return snapshot_id;<br>
> +}<br>
> +<br>
> +int get_curr_snapshot_id_idx(void)<br>
> +{<br>
> + struct snapshot *si;<br>
> + int idx = 0;<br>
> +<br>
> + if (list_empty(&snapshot_head))<br>
> + pull_snapshot_ids();<br>
> +<br>
> + list_for_each_entry(si, &snapshot_head, l) {<br>
> + if (!strncmp(si->snapshot_id, snapshot_id, PATHLEN))<br>
> + return idx;<br>
> + idx++;<br>
> + }<br>
> +<br>
> + pr_perror("Error, could not find current snapshot id (%s) fd", snapshot_id);<br>
> + return -1;<br>
> +}<br>
> +<br>
> +char *get_snapshot_id_from_idx(int idx)<br>
> +{<br>
> + struct snapshot *si;<br>
> +<br>
> + if (list_empty(&snapshot_head))<br>
> + pull_snapshot_ids();<br>
> +<br>
> + /* Note: if idx is the service fd then we need the current<br>
> + * snapshot_id idx. Else we need a parent snapshot_id idx.<br>
> + */<br>
> + if (idx == get_service_fd(IMG_FD_OFF))<br>
> + idx = get_curr_snapshot_id_idx();<br>
> +<br>
> + list_for_each_entry(si, &snapshot_head, l) {<br>
> + if (!idx)<br>
> + return si->snapshot_id;<br>
> + idx--;<br>
> + }<br>
> +<br>
> + pr_perror("Error, could not find snapshot id for idx %d", idx);<br>
> + return NULL;<br>
> +}<br>
> +<br>
> +int get_curr_parent_snapshot_id_idx(void)<br>
> +{<br>
> + return get_curr_snapshot_id_idx() - 1;<br>
> +}<br>
> diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h<br>
> index 8eeadc9..ada8369 100644<br>
> --- a/criu/include/cr_options.h<br>
> +++ b/criu/include/cr_options.h<br>
> @@ -111,6 +111,9 @@ struct cr_options {<br>
> unsigned int empty_ns;<br>
> bool tcp_skip_in_flight;<br>
> char *work_dir;<br>
> + bool remote;<br>
> + char *local_cache_path;<br>
> + char *local_proxy_path;<br>
> };<br>
><br>
> extern struct cr_options opts;<br>
> diff --git a/criu/include/image.h b/criu/include/image.h<br>
> index 65b7b0a..9ba6ab8 100644<br>
> --- a/criu/include/image.h<br>
> +++ b/criu/include/image.h<br>
> @@ -104,6 +104,7 @@ extern bool img_common_magic;<br>
> #define O_DUMP (O_WRONLY | O_CREAT | O_TRUNC)<br>
> #define O_SHOW (O_RDONLY | O_NOBUF)<br>
> #define O_RSTR (O_RDONLY)<br>
> +#define O_FORCE_LOCAL (O_SYNC)<br>
><br>
> struct cr_img {<br>
> union {<br>
> diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h<br>
> index 6c76b49..43ac534 100644<br>
> --- a/criu/include/protobuf-desc.h<br>
> +++ b/criu/include/protobuf-desc.h<br>
> @@ -59,6 +59,10 @@ enum {<br>
> PB_BINFMT_MISC, /* 50 */<br>
> PB_TTY_DATA,<br>
> PB_AUTOFS,<br>
> + PB_REMOTE_IMAGE, /* Header for images sent from proxy to cache.*/<br>
> + PB_LOCAL_IMAGE, /* Header for reading/writing images from/to proxy or cache. */<br>
> + PB_LOCAL_IMAGE_REPLY, /* Header for reading/writing images reply. */<br>
> + PB_SNAPSHOT_ID, /* Contains a single id. Used for reading/writing ids from proxy or cache. */<br>
><br>
> /* PB_AUTOGEN_STOP */<br>
><br>
> diff --git a/criu/include/util.h b/criu/include/util.h<br>
> index 5b7cad1..63be0ea 100644<br>
> --- a/criu/include/util.h<br>
> +++ b/criu/include/util.h<br>
> @@ -259,6 +259,7 @@ FILE *fopenat(int dirfd, char *path, char *cflags);<br>
> void split(char *str, char token, char ***out, int *n);<br>
><br>
> int fd_has_data(int lfd);<br>
> +size_t read_into_buffer(int fd, char *buff, size_t size);<br>
><br>
> int make_yard(char *path);<br>
><br>
> diff --git a/criu/page-xfer.c b/criu/page-xfer.c<br>
> index 7978227..3c668d7 100644<br>
> --- a/criu/page-xfer.c<br>
> +++ b/criu/page-xfer.c<br>
> @@ -17,6 +17,8 @@<br>
> #include "protobuf.h"<br>
> #include "images/pagemap.pb-c.h"<br>
><br>
> +#include "img-remote.h"<br>
> +<br>
> static int page_server_sk = -1;<br>
><br>
> struct page_server_iov {<br>
> @@ -288,7 +290,9 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)<br>
> goto out;<br>
><br>
> xfer->parent = xmalloc(sizeof(*xfer->parent));<br>
> - if (!xfer->parent) {<br>
> + if (!xfer->parent && opts.remote) {<br>
> + return -1;<br>
> + } else if (!xfer->parent) {<br>
<br>
</div></div>I think using<br>
<br>
if (!xfer->parent) {<br>
if (!opts.remote)<br>
close(pfd);<br>
return -1;<br>
}<br>
<br>
is little bit less obscure.<br>
<div><div><br></div></div></blockquote><div>You are right. Changed this. <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div><div>
> close(pfd);<br>
> return -1;<br>
> }<br>
> @@ -298,10 +302,12 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)<br>
> pr_perror("No parent image found, though parent directory is set");<br>
> xfree(xfer->parent);<br>
> xfer->parent = NULL;<br>
> - close(pfd);<br>
> + if (!opts.remote)<br>
> + close(pfd);<br>
> goto out;<br>
> }<br>
> - close(pfd);<br>
> + if (!opts.remote)<br>
> + close(pfd);<br>
> }<br>
><br>
> out:<br>
> @@ -406,9 +412,16 @@ int check_parent_local_xfer(int fd_type, int id)<br>
> struct stat st;<br>
> int ret, pfd;<br>
><br>
> - pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);<br>
> - if (pfd < 0 && errno == ENOENT)<br>
> - return 0;<br>
> + if (opts.remote) {<br>
> + pfd = get_curr_parent_snapshot_id_idx();<br>
> + pr_err("Unable to get parent snapsgot id");<br>
> + if (pfd == -1)<br>
> + return -1;<br>
> + } else {<br>
> + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);<br>
> + if (pfd < 0 && errno == ENOENT)<br>
> + return 0;<br>
> + }<br>
><br>
> snprintf(path, sizeof(path), imgset_template[fd_type].fmt, id);<br>
> ret = fstatat(pfd, path, &st, 0);<br>
> @@ -470,6 +483,8 @@ int check_parent_page_xfer(int fd_type, long id)<br>
> {<br>
> if (opts.use_page_server)<br>
> return check_parent_server_xfer(fd_type, id);<br>
> + else if (opts.remote)<br>
> + return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1;<br>
> else<br>
> return check_parent_local_xfer(fd_type, id);<br>
> }<br>
> diff --git a/criu/pagemap.c b/criu/pagemap.c<br>
> index dbe9cc4..693f967 100644<br>
> --- a/criu/pagemap.c<br>
> +++ b/criu/pagemap.c<br>
> @@ -11,6 +11,8 @@<br>
> #include "protobuf.h"<br>
> #include "images/pagemap.pb-c.h"<br>
><br>
> +#include "img-remote.h"<br>
> +<br>
> #ifndef SEEK_DATA<br>
> #define SEEK_DATA 3<br>
> #define SEEK_HOLE 4<br>
> @@ -124,11 +126,18 @@ int dedup_one_iovec(struct page_read *pr, struct iovec *iov)<br>
> static int get_pagemap(struct page_read *pr, struct iovec *iov)<br>
> {<br>
> PagemapEntry *pe;<br>
> + int ret;<br>
><br>
> - if (pr->curr_pme >= pr->nr_pmes)<br>
> - return 0;<br>
> + if (opts.remote) {<br>
> + ret = pb_read_one_eof(pr->pmi, &pe, PB_PAGEMAP);<br>
> + if (ret <= 0)<br>
> + return ret;<br>
> + } else {<br>
> + if (pr->curr_pme >= pr->nr_pmes)<br>
> + return 0;<br>
><br>
> - pe = pr->pmes[pr->curr_pme];<br>
> + pe = pr->pmes[pr->curr_pme];<br>
> + }<br>
><br>
> pagemap2iovec(pe, iov);<br>
><br>
> @@ -261,7 +270,7 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,<br>
> off_t current_vaddr = lseek(fd, pr->pi_off, SEEK_SET);<br>
><br>
> pr_debug("\tpr%u Read page from self %lx/%"PRIx64"\n", pr->id, pr->cvaddr, current_vaddr);<br>
> - ret = read(fd, buf, len);<br>
> + ret = read_into_buffer(fd, buf, len);<br>
> if (ret != len) {<br>
> pr_perror("Can't read mapping page %d", ret);<br>
> return -1;<br>
> @@ -314,7 +323,7 @@ static void close_page_read(struct page_read *pr)<br>
> if (pr->pi)<br>
> close_image(pr->pi);<br>
><br>
> - if (pr->pmes)<br>
> + if (!opts.remote && pr->pmes)<br>
> free_pagemaps(pr);<br>
> }<br>
><br>
> @@ -323,9 +332,24 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)<br>
> int pfd, ret;<br>
> struct page_read *parent = NULL;<br>
><br>
> - pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);<br>
> - if (pfd < 0 && errno == ENOENT)<br>
> - goto out;<br>
> + if (opts.remote) {<br>
> + /* Note: we are replacing a real directory FD for a snapshot_id<br>
> + * index. Since we need the parent of the current snapshot_id,<br>
> + * we want the current snapshot_id index minus one. It is<br>
> + * possible that dfd is already a snapshot_id index. We test it<br>
> + * by comparing it to the service FD. When opening an image (see<br>
> + * do_open_image) we convert the snapshot_id index into a real<br>
> + * snapshot_id.<br>
> + */<br>
> + pfd = dfd == get_service_fd(IMG_FD_OFF) ?<br>
> + get_curr_snapshot_id_idx() - 1 : dfd - 1;<br>
> + if (pfd < 0)<br>
> + goto out;<br>
> + } else {<br>
> + pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);<br>
> + if (pfd < 0 && errno == ENOENT)<br>
> + goto out;<br>
> + }<br>
><br>
> parent = xmalloc(sizeof(*parent));<br>
> if (!parent)<br>
> @@ -348,7 +372,8 @@ out:<br>
> err_free:<br>
> xfree(parent);<br>
> err_cl:<br>
> - close(pfd);<br>
> + if (!opts.remote)<br>
> + close(pfd);<br>
> return -1;<br>
> }<br>
><br>
> @@ -458,7 +483,7 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)<br>
> return -1;<br>
> }<br>
><br>
> - if (init_pagemaps(pr)) {<br>
> + if (!opts.remote && init_pagemaps(pr)) {<br>
<br>
</div></div>Is there anything that prevents using init_pagemaps with opts.remote?<br>
Why cannot we just read the entire pagemap from the socket as we read it<br>
from local file?<br></blockquote><div> </div><div>I hadn't commented it out at start, but got an error. I am not sure what the exact problem is.<br></div><div>The thing is that img_raw_size(pr->pmi) inside init_pagemaps returns 0; <br></div><div>Tell me if you understand why that happens.<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div><div><br>
> close_page_read(pr);<br>
> return -1;<br>
> }<br>
> @@ -467,7 +492,8 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)<br>
> pr->put_pagemap = put_pagemap;<br>
> pr->read_pages = read_pagemap_page;<br>
> pr->close = close_page_read;<br>
> - pr->seek_page = seek_pagemap_page;<br>
> + if (!opts.remote)<br>
> + pr->seek_page = seek_pagemap_page;<br>
> pr->id = ids++;<br>
><br>
> pr_debug("Opened page read %u (parent %u)\n",<br>
> diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c<br>
> index 9352a76..c1850f9 100644<br>
> --- a/criu/protobuf-desc.c<br>
> +++ b/criu/protobuf-desc.c<br>
> @@ -64,6 +64,7 @@<br>
> #include "images/seccomp.pb-c.h"<br>
> #include "images/binfmt-misc.pb-c.h"<br>
> #include "images/autofs.pb-c.h"<br>
> +#include "images/remote-image.pb-c.h"<br>
><br>
> struct cr_pb_message_desc cr_pb_descs[PB_MAX];<br>
><br>
> diff --git a/criu/util.c b/criu/util.c<br>
> index c44d900..5e2f400 100644<br>
> --- a/criu/util.c<br>
> +++ b/criu/util.c<br>
> @@ -1184,3 +1184,18 @@ int setup_tcp_client(char *addr)<br>
><br>
> return sk;<br>
> }<br>
> +<br>
> +size_t read_into_buffer(int fd, char *buff, size_t size)<br>
> +{<br>
<br>
</div></div>Can you please explain why this wrapper is required?<br></blockquote><div>Actually it's not. It's only used one time. I moved the code.<br></div><div>So, thanks for the comments.<br></div><div>I 'll resend the patches with the above fixes, and rebased on<br></div><div>criu-dev, not master. <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div><div><br>
> + size_t n = 0;<br>
> + size_t curr = 0;<br>
> +<br>
> + while (1) {<br>
> + n = read(fd, buff + curr, size - curr);<br>
> + if (n < 1)<br>
> + return n;<br>
> + curr += n;<br>
> + if (curr == size)<br>
> + return size;<br>
> + }<br>
> +}<br>
> diff --git a/images/Makefile b/images/Makefile<br>
> index cf50794..3753d62 100644<br>
> --- a/images/Makefile<br>
> +++ b/images/Makefile<br>
> @@ -60,6 +60,7 @@ proto-obj-y += binfmt-misc.o<br>
> proto-obj-y += time.o<br>
> proto-obj-y += sysctl.o<br>
> proto-obj-y += autofs.o<br>
> +proto-obj-y += remote-image.o<br>
><br>
> CFLAGS += -iquote $(obj)/<br>
><br>
> diff --git a/images/remote-image.proto b/images/remote-image.proto<br>
> new file mode 100644<br>
> index 0000000..1212627<br>
> --- /dev/null<br>
> +++ b/images/remote-image.proto<br>
> @@ -0,0 +1,20 @@<br>
> +message local_image_entry {<br>
> + required string name = 1;<br>
> + required string snapshot_id = 2;<br>
> + required uint32 open_mode = 3;<br>
> +}<br>
> +<br>
> +message remote_image_entry {<br>
> + required string name = 1;<br>
> + required string snapshot_id = 2;<br>
> + required uint32 open_mode = 3;<br>
> + required uint64 size = 4;<br>
> +}<br>
> +<br>
> +message local_image_reply_entry {<br>
> + required uint32 error = 1;<br>
> +}<br>
> +<br>
> +message snapshot_id_entry {<br>
> + required string snapshot_id = 1;<br>
> +}<br>
> --<br>
> 2.7.3<br>
><br>
</div></div>> _______________________________________________<br>
> CRIU mailing list<br>
> <a href="mailto:CRIU@openvz.org" target="_blank">CRIU@openvz.org</a><br>
> <a href="https://lists.openvz.org/mailman/listinfo/criu" rel="noreferrer" target="_blank">https://lists.openvz.org/mailman/listinfo/criu</a><br>
><br>
<br>
</blockquote></div><br></div></div>