[CRIU] [PATCH 1/4] mem: Introduce image-proxy/image-cache & remote option
Katerina Koukiou
k.koukiou at googlemail.com
Wed Aug 3 14:50:18 PDT 2016
On Wed, Aug 3, 2016 at 12:42 PM, Mike Rapoport <rppt at linux.vnet.ibm.com>
wrote:
> On Tue, Aug 02, 2016 at 04:39:37PM +0000, Katerina Koukiou wrote:
> > This patch introduces --remote option and image-proxy/image-cache
> processes.
> > This leaves user the option to decide if the checkpoint data are to be
> stored
> > on disk or sent through socket to the image-proxy.
> > The latter forwards the data to the destination node where image-cache
> receives
> > them.
> >
> > The overall communication is performed as follows:
> > rc_node CRIU dump -> (sends images using a local socket) ->
> image-proxy
> >
> |
> >
> V
> > dst_node: CRIU restore <- (receives images from a local socket) <-
> image-cache
> >
> > Running criu with --remote option is like this:
> >
> > dst_node# criu image-cache --port <port> -o /tmp/image-cache.log
> > --local-cache-path <local_cache_path> ...
> > dst_node# criu restore --remote -o /tmp/image-cache.log
> > --local-cache-path <local_cache_path> ...
> > src_node# criu image-proxy --port <port> --address <dst_node> -o
> /tmp/image-proxy.log
> > --local-proxy-path <local_proxy_path> ...
> > src_node# criu dump -t <pid> --remote -o /tmp/dump.log
> > --local-proxy-path <local_proxy_path> ...
> >
> > Signed-off-by: Rodrigo Bruno <rbruno at gsd.inesc-id.pt>
> > Signed-off-by: Katerina Koukiou <k.koukiou at gmail.com>
> > ---
> > criu/Makefile.crtools | 4 +
> > criu/cr-dump.c | 15 +++
> > criu/crtools.c | 30 ++++-
> > criu/image-desc.c | 4 +-
> > criu/image.c | 26 ++++-
> > criu/img-remote.c | 272
> +++++++++++++++++++++++++++++++++++++++++++
> > criu/include/cr_options.h | 3 +
> > criu/include/image.h | 1 +
> > criu/include/protobuf-desc.h | 4 +
> > criu/include/util.h | 1 +
> > criu/page-xfer.c | 27 ++++-
> > criu/pagemap.c | 48 ++++++--
> > criu/protobuf-desc.c | 1 +
> > criu/util.c | 15 +++
> > images/Makefile | 1 +
> > images/remote-image.proto | 20 ++++
> > 16 files changed, 449 insertions(+), 23 deletions(-)
> > create mode 100644 criu/img-remote.c
> > create mode 100644 images/remote-image.proto
> >
> > diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
> > index 2665f92..8189960 100644
> > --- a/criu/Makefile.crtools
> > +++ b/criu/Makefile.crtools
> > @@ -26,6 +26,10 @@ obj-y += files-reg.o
> > obj-y += fsnotify.o
> > obj-y += image-desc.o
> > obj-y += image.o
> > +obj-y += img-remote.o
> > +obj-y += img-proxy.o
> > +obj-y += img-cache.o
> > +obj-y += img-remote-proto.o
> > obj-y += ipc_ns.o
> > obj-y += irmap.o
> > obj-y += kcmp-ids.o
> > diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> > index be83bec..a8b5ab6 100644
> > --- a/criu/cr-dump.c
> > +++ b/criu/cr-dump.c
> > @@ -84,6 +84,8 @@
> >
> > #include "asm/dump.h"
> >
> > +#include "img-remote.h"
> > +
> > static char loc_buf[PAGE_SIZE];
> >
> > void free_mappings(struct vm_area_list *vma_area_list)
> > @@ -1483,6 +1485,11 @@ int cr_pre_dump_tasks(pid_t pid)
> > struct pstree_item *item;
> > int ret = -1;
> >
> > + if (opts.remote && push_snapshot_id() < 0) {
> > + pr_err("Failed to push image namespace.\n");
> > + goto err;
> > + }
> > +
> > root_item = alloc_pstree_item();
> > if (!root_item)
> > goto err;
> > @@ -1613,6 +1620,9 @@ static int cr_dump_finish(int ret)
> >
> > close_service_fd(CR_PROC_FD_OFF);
> >
> > + if (opts.remote)
> > + finish_remote_dump();
> > +
>
> Please take care of possible errors.
> Would it be correct to call finish_remote_dump even is ret != 0?
> And, what happens if finish_remote_dump fails?
>
Fixed that.
>
> > if (ret) {
> > pr_err("Dumping FAILED.\n");
> > } else {
> > @@ -1633,6 +1643,11 @@ int cr_dump_tasks(pid_t pid)
> > pr_info("Dumping processes (pid: %d)\n", pid);
> > pr_info("========================================\n");
> >
> > + if (opts.remote && push_snapshot_id() < 0) {
> > + pr_err("Failed to push image namespace.\n");
> > + goto err;
> > + }
> > +
> > root_item = alloc_pstree_item();
> > if (!root_item)
> > goto err;
> > diff --git a/criu/crtools.c b/criu/crtools.c
> > index a0b2fc9..4efca16 100644
> > --- a/criu/crtools.c
> > +++ b/criu/crtools.c
> > @@ -48,6 +48,7 @@
> >
> > #include "setproctitle.h"
> > #include "sysctl.h"
> > +#include "img-remote.h"
> >
> > struct cr_options opts;
> >
> > @@ -72,6 +73,10 @@ void init_opts(void)
> > opts.ghost_limit = DEFAULT_GHOST_LIMIT;
> > opts.timeout = DEFAULT_TIMEOUT;
> > opts.empty_ns = 0;
> > + opts.addr = DEFAULT_CACHE_HOST;
> > + opts.port = DEFAULT_CACHE_PORT;
> > + opts.local_cache_path = DEFAULT_IMG_PATH;
> > + opts.local_proxy_path = DEFAULT_IMG_PATH;
> > }
> >
> > static int parse_join_ns(const char *ptr)
> > @@ -279,6 +284,9 @@ int main(int argc, char *argv[], char *envp[])
> > { "cgroup-props-file", required_argument, 0,
> 1081 },
> > { "cgroup-dump-controller", required_argument, 0,
> 1082 },
> > { SK_INFLIGHT_PARAM, no_argument, 0,
> 1083 },
> > + { "remote", no_argument, 0,
> 1084 },
> > + { "local-cache-path", required_argument, 0,
> 1085 },
> > + { "local-proxy-path", required_argument, 0,
> 1086 },
> > { },
> > };
> >
> > @@ -587,6 +595,15 @@ int main(int argc, char *argv[], char *envp[])
> > pr_msg("Will skip in-flight TCP connections\n");
> > opts.tcp_skip_in_flight = true;
> > break;
> > + case 1084:
> > + opts.remote = true;
> > + break;
> > + case 1085:
> > + opts.local_cache_path = optarg;
> > + break;
> > + case 1086:
> > + opts.local_proxy_path = optarg;
> > + break;
> > case 'V':
> > pr_msg("Version: %s\n", CRIU_VERSION);
> > if (strcmp(CRIU_GITID, "0"))
> > @@ -727,6 +744,12 @@ int main(int argc, char *argv[], char *envp[])
> > if (!strcmp(argv[optind], "page-server"))
> > return cr_page_server(opts.daemon_mode, -1) > 0 ? 0 : 1;
> >
> > + if (!strcmp(argv[optind], "image-cache"))
> > + return image_cache(opts.local_cache_path, opts.port);
> > +
> > + if (!strcmp(argv[optind], "image-proxy"))
> > + return image_proxy(opts.local_proxy_path, opts.addr,
> opts.port);
> > +
> > if (!strcmp(argv[optind], "service"))
> > return cr_service(opts.daemon_mode);
> >
> > @@ -753,6 +776,8 @@ usage:
> > " criu page-server\n"
> > " criu service [<options>]\n"
> > " criu dedup\n"
> > +" criu image-cache [<options>]\n"
> > +" criu image-proxy [<options>]\n"
> > "\n"
> > "Commands:\n"
> > " dump checkpoint a process/tree identified by pid\n"
> > @@ -765,6 +790,8 @@ usage:
> > " dedup remove duplicates in memory dump\n"
> > " cpuinfo dump writes cpu information into image file\n"
> > " cpuinfo check validates cpu information read from image file\n"
> > +" image-cache launch destination-side cache for images sent from
> the source-side\n"
> > +" image-proxy launch source-side proxy to sent images to the
> destination-side\n"
> > );
> >
> > if (usage_error) {
> > @@ -791,6 +818,7 @@ usage:
> > " restore making it the parent of the restored
> process\n"
> > " --freeze-cgroup\n"
> > " use cgroup freezer to collect processes\n"
> > +" --remote dump/restore images directly to/from remote
> node using image-proxy/image-cache\n"
> > "\n"
> > "* Special resources support:\n"
> > " -x|--" USK_EXT_PARAM "inode,.." " allow external unix
> connections (optionally can be assign socket's inode that allows one-sided
> dump)\n"
> > @@ -900,7 +928,7 @@ usage:
> > "\n"
> > "Page/Service server options:\n"
> > " --address ADDR address of server or service\n"
> > -" --port PORT port of page server\n"
> > +" --port PORT port of page serve or service\n"
> > " -d|--daemon run in the background after creating socket\n"
> > "\n"
> > "Other options:\n"
> > diff --git a/criu/image-desc.c b/criu/image-desc.c
> > index 2b31354..e146ef8 100644
> > --- a/criu/image-desc.c
> > +++ b/criu/image-desc.c
> > @@ -102,13 +102,13 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX]
> = {
> > [CR_FD_STATS] = {
> > .fmt = "stats-%s",
> > .magic = STATS_MAGIC,
> > - .oflags = O_SERVICE,
> > + .oflags = O_SERVICE | O_FORCE_LOCAL,
> > },
> >
> > [CR_FD_IRMAP_CACHE] = {
> > .fmt = "irmap-cache",
> > .magic = IRMAP_CACHE_MAGIC,
> > - .oflags = O_SERVICE,
> > + .oflags = O_SERVICE | O_FORCE_LOCAL,
> > },
> >
> > [CR_FD_FILE_LOCKS_PID] = {
> > diff --git a/criu/image.c b/criu/image.c
> > index a3bb285..d5ecea2 100644
> > --- a/criu/image.c
> > +++ b/criu/image.c
> > @@ -13,6 +13,7 @@
> > #include "protobuf.h"
> > #include "images/inventory.pb-c.h"
> > #include "images/pagemap.pb-c.h"
> > +#include "img-remote.h"
> >
> > bool ns_per_id = false;
> > bool img_common_magic = true;
> > @@ -309,9 +310,26 @@ static int do_open_image(struct cr_img *img, int
> dfd, int type, unsigned long of
> > {
> > int ret, flags;
> >
> > - flags = oflags & ~(O_NOBUF | O_SERVICE);
> > + flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);
> >
> > - ret = openat(dfd, path, flags, CR_FD_PERM);
> > + if (opts.remote && !(oflags & O_FORCE_LOCAL)) {
> > + char *snapshot_id = NULL;
> > +
> > + snapshot_id = get_snapshot_id_from_idx(dfd);
> > +
> > + if (snapshot_id == NULL)
> > + ret = -1;
> > + else if (flags == O_RDONLY) {
> > + pr_info("do_open_remote_image RDONLY path=%s
> snapshot_id=%s\n",
> > + path, snapshot_id);
>
> IMHO, pr_debug will suffice here
>
> > + ret = read_remote_image_connection(snapshot_id,
> path);
> > + } else {
> > + pr_info("do_open_remote_image WDONLY path=%s
> snapshot_id=%s\n",
> > + path, snapshot_id);
>
> and here
>
Right, changed these.
>
> > + ret = write_remote_image_connection(snapshot_id,
> path, O_WRONLY);
> > + }
> > + } else
> > + ret = openat(dfd, path, flags, CR_FD_PERM);
> > if (ret < 0) {
> > if (!(flags & O_CREAT) && (errno == ENOENT)) {
> > pr_info("No %s image\n", path);
> > @@ -413,7 +431,9 @@ int open_image_dir(char *dir)
> > close(fd);
> > fd = ret;
> >
> > - if (opts.img_parent) {
> > + if (opts.remote) {
> > + init_snapshot_id(dir);
> > + } else if (opts.img_parent) {
> > ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);
> > if (ret < 0 && errno != EEXIST) {
> > pr_perror("Can't link parent snapshot");
> > diff --git a/criu/img-remote.c b/criu/img-remote.c
> > new file mode 100644
> > index 0000000..05f3666
> > --- /dev/null
> > +++ b/criu/img-remote.c
> > @@ -0,0 +1,272 @@
> > +#include <unistd.h>
> > +#include <stdlib.h>
> > +#include <sys/types.h>
> > +#include <sys/socket.h>
> > +#include <netinet/in.h>
> > +#include <netdb.h>
> > +#include "xmalloc.h"
> > +#include "criu-log.h"
> > +#include "img-remote.h"
> > +#include "img-remote-proto.h"
> > +#include "images/remote-image.pb-c.h"
> > +#include "protobuf-desc.h"
> > +#include <fcntl.h>
> > +#include "servicefd.h"
> > +#include "compiler.h"
> > +#include "cr_options.h"
> > +
> > +#define PB_LOCAL_IMAGE_SIZE PATHLEN
> > +
> > +static char *snapshot_id;
> > +
> > +LIST_HEAD(snapshot_head);
> > +
> > +/* A snapshot is a dump or pre-dump operation. Each snapshot is
> identified by an
> > + * ID which corresponds to the working directory specefied by the user.
> > + */
> > +struct snapshot {
> > + char snapshot_id[PATHLEN];
> > + struct list_head l;
> > +};
> > +
> > +struct snapshot *new_snapshot(char *snapshot_id)
> > +{
> > + struct snapshot *s = malloc(sizeof(struct snapshot));
> > +
> > + if (!s)
> > + pr_perror("Failed to allocate snapshot structure");
>
> Shouldn't we propagate allocation error?
>
> > + strncpy(s->snapshot_id, snapshot_id, PATHLEN);
>
> because this fill SIGSEGV otherwise ;-)
Fixed that.
> > + return s;
> > +}
> > +
> > +void add_snapshot(struct snapshot *snapshot)
> > +{
> > + list_add_tail(&(snapshot->l), &snapshot_head);
> > +}
> > +
> > +static char *get_local_img_path(void)
> > +{
> > + static char *local_img_path = NULL;
> > +
> > + if (local_img_path != NULL)
> > + return local_img_path;
> > +
> > + if (strcmp(opts.local_cache_path, DEFAULT_IMG_PATH))
> > + local_img_path = opts.local_cache_path;
> > + else if (strcmp(opts.local_proxy_path, DEFAULT_IMG_PATH))
> > + local_img_path = opts.local_proxy_path;
> > +
>
> It seems that if user will pass DEFAULT_IMG_PATH in opts.local_cache_path
> and in opts.local_proxy_path, the local_img_path will remain NULL...
>
> Fixed that.
> > + return local_img_path;
> > +}
> > +
> > +int read_remote_image_connection(char *snapshot_id, char *path)
> > +{
> > + int error;
> > + int sockfd = setup_UNIX_client_socket(get_local_img_path());
> > +
> > + if (sockfd < 0) {
> > + pr_perror("Error opening local connection for %s:%s",
> path, snapshot_id);
> > + return -1;
> > + }
> > +
> > + if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) {
> > + pr_perror("Error writing header for %s:%s", path,
> snapshot_id);
> > + return -1;
> > + }
> > +
> > + if (read_reply_header(sockfd, &error) < 0) {
> > + pr_perror("Error reading reply header for %s:%s", path,
> snapshot_id);
> > + return -1;
> > + }
> > + errno = error;
>
> Can you please explain why do you assign errno value?
>
> Yes, the code is not mine; I just rebased it. But I can tell you what I
understand.
In criu/image.c when an image does not exist read_remote_image_connection
returns -1. In image.c we want to handle this case separately when
read_remote_image_connection returns -1 but the reason is error code ENOENT.
Then because read_reply_header does not assign errno but error variable,
we do the "errno = error" assignment by ourselves.
> + if (!error)
> > + return sockfd;
> > + else if (error == ENOENT) {
> > + pr_info("Image does not exist (%s:%s)\n", path,
> snapshot_id);
> > + close(sockfd);
> > + return -1;
> > + }
> > + pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path,
> snapshot_id);
> > + close(sockfd);
> > + return -1;
> > +}
> > +
> > +int write_remote_image_connection(char *snapshot_id, char *path, int
> flags)
> > +{
> > + int sockfd = setup_UNIX_client_socket(get_local_img_path());
> > +
> > + if (sockfd < 0)
> > + return -1;
> > +
> > + if (write_header(sockfd, snapshot_id, path, flags) < 0) {
> > + pr_perror("Error writing header for %s:%s", path,
> snapshot_id);
> > + return -1;
> > + }
> > + return sockfd;
> > +}
> > +
> > +int finish_remote_dump(void)
> > +{
> > + pr_info("Dump side is calling finish\n");
> > + int fd = write_remote_image_connection(NULL_SNAPSHOT_ID,
> DUMP_FINISH, O_WRONLY);
> > +
> > + if (fd == -1) {
> > + pr_perror("Unable to open finish dump connection");
> > + return -1;
> > + }
> > +
> > + close(fd);
> > + return 0;
> > +}
> > +
> > +int skip_remote_bytes(int fd, unsigned long len)
> > +{
> > + static char buf[4096];
> > + int n = 0;
> > + unsigned long curr = 0;
> > +
> > + for (; curr < len; ) {
> > + n = read(fd, buf, min(len - curr, (unsigned long)4096));
> > + if (n == 0) {
> > + pr_perror("Unexpected end of stream (skipping
> %lx/%lx bytes)",
> > + curr, len);
> > + return -1;
> > + } else if (n > 0) {
> > + curr += n;
> > + } else {
> > + pr_perror("Error while skipping bytes from stream
> (%lx/%lx)",
> > + curr, len);
> > + return -1;
> > + }
> > + }
> > +
> > + if (curr != len) {
> > + pr_perror("Unable to skip the current number of bytes: %lx
> instead of %lx",
> > + curr, len);
> > + return -1;
> > + }
> > + return 0;
> > +}
> > +
> > +static int pull_snapshot_ids(void)
> > +{
> > + int n, sockfd;
> > + SnapshotIdEntry *ls;
> > + struct snapshot *s = NULL;
> > +
> > + sockfd = read_remote_image_connection(NULL_SNAPSHOT_ID,
> PARENT_IMG);
> > +
> > + /* The connection was successful but there is not file. */
> > + if (sockfd < 0 && errno == ENOENT)
> > + return 0;
> > + else if (sockfd < 0) {
> > + pr_perror("Unable to open snapshot id read connection");
> > + return -1;
> > + }
> > +
> > + while (1) {
> > + n = pb_read_obj(sockfd, (void **)&ls, PB_SNAPSHOT_ID);
> > + if (!n) {
> > + close(sockfd);
> > + return n;
> > + } else if (n < 0) {
> > + pr_perror("Unable to read remote snapshot ids");
> > + close(sockfd);
> > + return n;
> > + }
> > +
> > + s = new_snapshot(ls->snapshot_id);
> > + if (!s) {
> > + pr_perror("Unable create new snapshot structure");
> > + close(sockfd);
> > + return -1;
> > + }
> > + add_snapshot(s);
> > + pr_info("[read_snapshot ids] parent = %s\n",
> ls->snapshot_id);
> > + }
> > + free(ls);
> > + close(sockfd);
> > + return n;
> > +}
> > +
> > +int push_snapshot_id(void)
> > +{
> > + int n;
> > + SnapshotIdEntry rn = SNAPSHOT_ID_ENTRY__INIT;
> > + int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID,
> PARENT_IMG, O_APPEND);
> > +
> > + if (sockfd < 0) {
> > + pr_perror("Unable to open snapshot id push connection");
> > + return -1;
> > + }
> > +
> > + rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN);
> > + if (!rn.snapshot_id) {
> > + pr_perror("Unable to allocate snapshot id buffer");
> > + close(sockfd);
> > + return -1;
> > + }
> > + strncpy(rn.snapshot_id, snapshot_id, PATHLEN);
> > +
> > + n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID);
> > +
> > + xfree(rn.snapshot_id);
> > + close(sockfd);
> > + return n;
> > +}
> > +
> > +void init_snapshot_id(char *si)
> > +{
> > + snapshot_id = si;
> > +}
> > +
> > +char *get_curr_snapshot_id(void)
> > +{
> > + return snapshot_id;
> > +}
> > +
> > +int get_curr_snapshot_id_idx(void)
> > +{
> > + struct snapshot *si;
> > + int idx = 0;
> > +
> > + if (list_empty(&snapshot_head))
> > + pull_snapshot_ids();
> > +
> > + list_for_each_entry(si, &snapshot_head, l) {
> > + if (!strncmp(si->snapshot_id, snapshot_id, PATHLEN))
> > + return idx;
> > + idx++;
> > + }
> > +
> > + pr_perror("Error, could not find current snapshot id (%s) fd",
> snapshot_id);
> > + return -1;
> > +}
> > +
> > +char *get_snapshot_id_from_idx(int idx)
> > +{
> > + struct snapshot *si;
> > +
> > + if (list_empty(&snapshot_head))
> > + pull_snapshot_ids();
> > +
> > + /* Note: if idx is the service fd then we need the current
> > + * snapshot_id idx. Else we need a parent snapshot_id idx.
> > + */
> > + if (idx == get_service_fd(IMG_FD_OFF))
> > + idx = get_curr_snapshot_id_idx();
> > +
> > + list_for_each_entry(si, &snapshot_head, l) {
> > + if (!idx)
> > + return si->snapshot_id;
> > + idx--;
> > + }
> > +
> > + pr_perror("Error, could not find snapshot id for idx %d", idx);
> > + return NULL;
> > +}
> > +
> > +int get_curr_parent_snapshot_id_idx(void)
> > +{
> > + return get_curr_snapshot_id_idx() - 1;
> > +}
> > diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
> > index 8eeadc9..ada8369 100644
> > --- a/criu/include/cr_options.h
> > +++ b/criu/include/cr_options.h
> > @@ -111,6 +111,9 @@ struct cr_options {
> > unsigned int empty_ns;
> > bool tcp_skip_in_flight;
> > char *work_dir;
> > + bool remote;
> > + char *local_cache_path;
> > + char *local_proxy_path;
> > };
> >
> > extern struct cr_options opts;
> > diff --git a/criu/include/image.h b/criu/include/image.h
> > index 65b7b0a..9ba6ab8 100644
> > --- a/criu/include/image.h
> > +++ b/criu/include/image.h
> > @@ -104,6 +104,7 @@ extern bool img_common_magic;
> > #define O_DUMP (O_WRONLY | O_CREAT | O_TRUNC)
> > #define O_SHOW (O_RDONLY | O_NOBUF)
> > #define O_RSTR (O_RDONLY)
> > +#define O_FORCE_LOCAL (O_SYNC)
> >
> > struct cr_img {
> > union {
> > diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
> > index 6c76b49..43ac534 100644
> > --- a/criu/include/protobuf-desc.h
> > +++ b/criu/include/protobuf-desc.h
> > @@ -59,6 +59,10 @@ enum {
> > PB_BINFMT_MISC, /* 50 */
> > PB_TTY_DATA,
> > PB_AUTOFS,
> > + PB_REMOTE_IMAGE, /* Header for images sent from proxy to
> cache.*/
> > + PB_LOCAL_IMAGE, /* Header for reading/writing images
> from/to proxy or cache. */
> > + PB_LOCAL_IMAGE_REPLY, /* Header for reading/writing images
> reply. */
> > + PB_SNAPSHOT_ID, /* Contains a single id. Used for
> reading/writing ids from proxy or cache. */
> >
> > /* PB_AUTOGEN_STOP */
> >
> > diff --git a/criu/include/util.h b/criu/include/util.h
> > index 5b7cad1..63be0ea 100644
> > --- a/criu/include/util.h
> > +++ b/criu/include/util.h
> > @@ -259,6 +259,7 @@ FILE *fopenat(int dirfd, char *path, char *cflags);
> > void split(char *str, char token, char ***out, int *n);
> >
> > int fd_has_data(int lfd);
> > +size_t read_into_buffer(int fd, char *buff, size_t size);
> >
> > int make_yard(char *path);
> >
> > diff --git a/criu/page-xfer.c b/criu/page-xfer.c
> > index 7978227..3c668d7 100644
> > --- a/criu/page-xfer.c
> > +++ b/criu/page-xfer.c
> > @@ -17,6 +17,8 @@
> > #include "protobuf.h"
> > #include "images/pagemap.pb-c.h"
> >
> > +#include "img-remote.h"
> > +
> > static int page_server_sk = -1;
> >
> > struct page_server_iov {
> > @@ -288,7 +290,9 @@ static int open_page_local_xfer(struct page_xfer
> *xfer, int fd_type, long id)
> > goto out;
> >
> > xfer->parent = xmalloc(sizeof(*xfer->parent));
> > - if (!xfer->parent) {
> > + if (!xfer->parent && opts.remote) {
> > + return -1;
> > + } else if (!xfer->parent) {
>
> I think using
>
> if (!xfer->parent) {
> if (!opts.remote)
> close(pfd);
> return -1;
> }
>
> is little bit less obscure.
>
> You are right. Changed this.
> > close(pfd);
> > return -1;
> > }
> > @@ -298,10 +302,12 @@ static int open_page_local_xfer(struct page_xfer
> *xfer, int fd_type, long id)
> > pr_perror("No parent image found, though parent
> directory is set");
> > xfree(xfer->parent);
> > xfer->parent = NULL;
> > - close(pfd);
> > + if (!opts.remote)
> > + close(pfd);
> > goto out;
> > }
> > - close(pfd);
> > + if (!opts.remote)
> > + close(pfd);
> > }
> >
> > out:
> > @@ -406,9 +412,16 @@ int check_parent_local_xfer(int fd_type, int id)
> > struct stat st;
> > int ret, pfd;
> >
> > - pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
> > - if (pfd < 0 && errno == ENOENT)
> > - return 0;
> > + if (opts.remote) {
> > + pfd = get_curr_parent_snapshot_id_idx();
> > + pr_err("Unable to get parent snapsgot id");
> > + if (pfd == -1)
> > + return -1;
> > + } else {
> > + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK,
> O_RDONLY);
> > + if (pfd < 0 && errno == ENOENT)
> > + return 0;
> > + }
> >
> > snprintf(path, sizeof(path), imgset_template[fd_type].fmt, id);
> > ret = fstatat(pfd, path, &st, 0);
> > @@ -470,6 +483,8 @@ int check_parent_page_xfer(int fd_type, long id)
> > {
> > if (opts.use_page_server)
> > return check_parent_server_xfer(fd_type, id);
> > + else if (opts.remote)
> > + return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1;
> > else
> > return check_parent_local_xfer(fd_type, id);
> > }
> > diff --git a/criu/pagemap.c b/criu/pagemap.c
> > index dbe9cc4..693f967 100644
> > --- a/criu/pagemap.c
> > +++ b/criu/pagemap.c
> > @@ -11,6 +11,8 @@
> > #include "protobuf.h"
> > #include "images/pagemap.pb-c.h"
> >
> > +#include "img-remote.h"
> > +
> > #ifndef SEEK_DATA
> > #define SEEK_DATA 3
> > #define SEEK_HOLE 4
> > @@ -124,11 +126,18 @@ int dedup_one_iovec(struct page_read *pr, struct
> iovec *iov)
> > static int get_pagemap(struct page_read *pr, struct iovec *iov)
> > {
> > PagemapEntry *pe;
> > + int ret;
> >
> > - if (pr->curr_pme >= pr->nr_pmes)
> > - return 0;
> > + if (opts.remote) {
> > + ret = pb_read_one_eof(pr->pmi, &pe, PB_PAGEMAP);
> > + if (ret <= 0)
> > + return ret;
> > + } else {
> > + if (pr->curr_pme >= pr->nr_pmes)
> > + return 0;
> >
> > - pe = pr->pmes[pr->curr_pme];
> > + pe = pr->pmes[pr->curr_pme];
> > + }
> >
> > pagemap2iovec(pe, iov);
> >
> > @@ -261,7 +270,7 @@ static int read_pagemap_page(struct page_read *pr,
> unsigned long vaddr, int nr,
> > off_t current_vaddr = lseek(fd, pr->pi_off, SEEK_SET);
> >
> > pr_debug("\tpr%u Read page from self %lx/%"PRIx64"\n",
> pr->id, pr->cvaddr, current_vaddr);
> > - ret = read(fd, buf, len);
> > + ret = read_into_buffer(fd, buf, len);
> > if (ret != len) {
> > pr_perror("Can't read mapping page %d", ret);
> > return -1;
> > @@ -314,7 +323,7 @@ static void close_page_read(struct page_read *pr)
> > if (pr->pi)
> > close_image(pr->pi);
> >
> > - if (pr->pmes)
> > + if (!opts.remote && pr->pmes)
> > free_pagemaps(pr);
> > }
> >
> > @@ -323,9 +332,24 @@ static int try_open_parent(int dfd, int pid, struct
> page_read *pr, int pr_flags)
> > int pfd, ret;
> > struct page_read *parent = NULL;
> >
> > - pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
> > - if (pfd < 0 && errno == ENOENT)
> > - goto out;
> > + if (opts.remote) {
> > + /* Note: we are replacing a real directory FD for a
> snapshot_id
> > + * index. Since we need the parent of the current
> snapshot_id,
> > + * we want the current snapshot_id index minus one. It is
> > + * possible that dfd is already a snapshot_id index. We
> test it
> > + * by comparing it to the service FD. When opening an
> image (see
> > + * do_open_image) we convert the snapshot_id index into a
> real
> > + * snapshot_id.
> > + */
> > + pfd = dfd == get_service_fd(IMG_FD_OFF) ?
> > + get_curr_snapshot_id_idx() - 1 : dfd - 1;
> > + if (pfd < 0)
> > + goto out;
> > + } else {
> > + pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
> > + if (pfd < 0 && errno == ENOENT)
> > + goto out;
> > + }
> >
> > parent = xmalloc(sizeof(*parent));
> > if (!parent)
> > @@ -348,7 +372,8 @@ out:
> > err_free:
> > xfree(parent);
> > err_cl:
> > - close(pfd);
> > + if (!opts.remote)
> > + close(pfd);
> > return -1;
> > }
> >
> > @@ -458,7 +483,7 @@ int open_page_read_at(int dfd, int pid, struct
> page_read *pr, int pr_flags)
> > return -1;
> > }
> >
> > - if (init_pagemaps(pr)) {
> > + if (!opts.remote && init_pagemaps(pr)) {
>
> Is there anything that prevents using init_pagemaps with opts.remote?
> Why cannot we just read the entire pagemap from the socket as we read it
> from local file?
>
I hadn't commented it out at start, but got an error. I am not sure what
the exact problem is.
The thing is that img_raw_size(pr->pmi) inside init_pagemaps returns 0;
Tell me if you understand why that happens.
>
> > close_page_read(pr);
> > return -1;
> > }
> > @@ -467,7 +492,8 @@ int open_page_read_at(int dfd, int pid, struct
> page_read *pr, int pr_flags)
> > pr->put_pagemap = put_pagemap;
> > pr->read_pages = read_pagemap_page;
> > pr->close = close_page_read;
> > - pr->seek_page = seek_pagemap_page;
> > + if (!opts.remote)
> > + pr->seek_page = seek_pagemap_page;
> > pr->id = ids++;
> >
> > pr_debug("Opened page read %u (parent %u)\n",
> > diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c
> > index 9352a76..c1850f9 100644
> > --- a/criu/protobuf-desc.c
> > +++ b/criu/protobuf-desc.c
> > @@ -64,6 +64,7 @@
> > #include "images/seccomp.pb-c.h"
> > #include "images/binfmt-misc.pb-c.h"
> > #include "images/autofs.pb-c.h"
> > +#include "images/remote-image.pb-c.h"
> >
> > struct cr_pb_message_desc cr_pb_descs[PB_MAX];
> >
> > diff --git a/criu/util.c b/criu/util.c
> > index c44d900..5e2f400 100644
> > --- a/criu/util.c
> > +++ b/criu/util.c
> > @@ -1184,3 +1184,18 @@ int setup_tcp_client(char *addr)
> >
> > return sk;
> > }
> > +
> > +size_t read_into_buffer(int fd, char *buff, size_t size)
> > +{
>
> Can you please explain why this wrapper is required?
>
Actually it's not. It's only used one time. I moved the code.
So, thanks for the comments.
I 'll resend the patches with the above fixes, and rebased on
criu-dev, not master.
>
> > + size_t n = 0;
> > + size_t curr = 0;
> > +
> > + while (1) {
> > + n = read(fd, buff + curr, size - curr);
> > + if (n < 1)
> > + return n;
> > + curr += n;
> > + if (curr == size)
> > + return size;
> > + }
> > +}
> > diff --git a/images/Makefile b/images/Makefile
> > index cf50794..3753d62 100644
> > --- a/images/Makefile
> > +++ b/images/Makefile
> > @@ -60,6 +60,7 @@ proto-obj-y += binfmt-misc.o
> > proto-obj-y += time.o
> > proto-obj-y += sysctl.o
> > proto-obj-y += autofs.o
> > +proto-obj-y += remote-image.o
> >
> > CFLAGS += -iquote $(obj)/
> >
> > diff --git a/images/remote-image.proto b/images/remote-image.proto
> > new file mode 100644
> > index 0000000..1212627
> > --- /dev/null
> > +++ b/images/remote-image.proto
> > @@ -0,0 +1,20 @@
> > +message local_image_entry {
> > + required string name = 1;
> > + required string snapshot_id = 2;
> > + required uint32 open_mode = 3;
> > +}
> > +
> > +message remote_image_entry {
> > + required string name = 1;
> > + required string snapshot_id = 2;
> > + required uint32 open_mode = 3;
> > + required uint64 size = 4;
> > +}
> > +
> > +message local_image_reply_entry {
> > + required uint32 error = 1;
> > +}
> > +
> > +message snapshot_id_entry {
> > + required string snapshot_id = 1;
> > +}
> > --
> > 2.7.3
> >
> > _______________________________________________
> > CRIU mailing list
> > CRIU at openvz.org
> > https://lists.openvz.org/mailman/listinfo/criu
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openvz.org/pipermail/criu/attachments/20160803/032dee92/attachment-0001.html>
More information about the CRIU
mailing list