[CRIU] [Process Migration using Sockets v4 - Patch 1/3]
Rodrigo Bruno
rbruno at gsd.inesc-id.pt
Fri Dec 11 14:18:22 PST 2015
Hi,
I think I fixed all the comments you added in the last version of the patch.
It took me a little longer to send the new version because a I found a bug when
a subprocess that was snapshoted by a predump no longer exists when the dump takes
place. I was not considering that situation (but now it is perfectly okey).
I also took some time to use the checkpatch.pl from linux. I fixed all the code.
Here is the first part of the patch:
Signed-off-by: Rodrigo Bruno <rbruno at gsd.inesc-id.pt>
>From 029e179269163fc069fbbcf26cb86f056940dc7d Mon Sep 17 00:00:00 2001
From: rodrigo-bruno <rbruno at gsd.inesc-id.pt>
Date: Fri, 11 Dec 2015 22:07:12 +0000
Subject: [PATCH 1/3] Process Migration using Sockets v4 (1/3)
---
Makefile.crtools | 4 +
cr-dedup.c | 1 +
cr-dump.c | 16 +++
cr-service.c | 6 +-
crtools.c | 37 +++++-
image-desc.c | 4 +-
image.c | 27 ++++-
img-remote.c | 272 ++++++++++++++++++++++++++++++++++++++++++++
include/cr_options.h | 5 +-
include/image.h | 3 +-
include/img-remote.h | 79 +++++++++++++
include/protobuf-desc.h | 8 +-
include/util.h | 1 +
page-read.c | 41 +++++--
page-xfer.c | 40 ++++---
protobuf-desc.c | 1 +
protobuf/Makefile | 1 +
protobuf/remote-image.proto | 20 ++++
util.c | 15 +++
19 files changed, 542 insertions(+), 39 deletions(-)
create mode 100644 img-remote.c
create mode 100644 include/img-remote.h
create mode 100644 protobuf/remote-image.proto
diff --git a/Makefile.crtools b/Makefile.crtools
index 80f704f..3af28b1 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -6,6 +6,10 @@ obj-y += crtools.o
obj-y += security.o
obj-y += image.o
obj-y += image-desc.o
+obj-y += img-remote.o
+obj-y += img-proxy.o
+obj-y += img-cache.o
+obj-y += img-remote-proto.o
obj-y += net.o
obj-y += tun.o
obj-y += proc_parse.o
diff --git a/cr-dedup.c b/cr-dedup.c
index b453c3e..77f0b39 100644
--- a/cr-dedup.c
+++ b/cr-dedup.c
@@ -9,6 +9,7 @@
#define MAX_BUNCH_SIZE 256
+/* TODO - patch this for using remote migration using sockets */
static int cr_dedup_one_pagemap(int pid);
int cr_dedup(void)
diff --git a/cr-dump.c b/cr-dump.c
index 3af077b..81ea880 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -83,6 +83,8 @@
#include "asm/dump.h"
+#include "img-remote.h"
+
static char loc_buf[PAGE_SIZE];
static void close_vma_file(struct vma_area *vma)
@@ -1343,6 +1345,11 @@ int cr_pre_dump_tasks(pid_t pid)
LIST_HEAD(ctls);
struct parasite_ctl *ctl, *n;
+ if (opts.remote && push_snapshot_id() < 0) {
+ pr_err("Failed to push image namespace.\n");
+ goto err;
+ }
+
if (!opts.track_mem) {
pr_info("Enforcing memory tracking for pre-dump.\n");
opts.track_mem = true;
@@ -1448,6 +1455,11 @@ int cr_dump_tasks(pid_t pid)
pr_info("Dumping processes (pid: %d)\n", pid);
pr_info("========================================\n");
+ if (opts.remote && push_snapshot_id() < 0) {
+ pr_err("Failed to push image namepsace.\n");
+ goto err;
+ }
+
if (init_stats(DUMP_STATS))
goto err;
@@ -1612,6 +1624,10 @@ err:
close_service_fd(CR_PROC_FD_OFF);
+ if (opts.remote) {
+ finish_remote_dump();
+ }
+
if (ret) {
kill_inventory();
pr_err("Dumping FAILED.\n");
diff --git a/cr-service.c b/cr-service.c
index 938ea9e..02ec0ed 100644
--- a/cr-service.c
+++ b/cr-service.c
@@ -340,7 +340,7 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
if (req->ps) {
opts.use_page_server = true;
opts.addr = req->ps->address;
- opts.ps_port = htons((short)req->ps->port);
+ opts.port = htons((short)req->ps->port);
if (req->ps->has_fd) {
if (!opts.swrk_restore)
@@ -649,7 +649,7 @@ static int start_page_server_req(int sk, CriuOpts *req)
if (setup_opts_from_req(sk, req))
goto out_ch;
- setproctitle("page-server --rpc --address %s --port %hu", opts.addr, opts.ps_port);
+ setproctitle("page-server --rpc --address %s --port %hu", opts.addr, opts.port);
pr_debug("Starting page server\n");
@@ -658,7 +658,7 @@ static int start_page_server_req(int sk, CriuOpts *req)
goto out_ch;
info.pid = pid;
- info.port = opts.ps_port;
+ info.port = opts.port;
count = write(start_pipe[1], &info, sizeof(info));
if (count != sizeof(info))
diff --git a/crtools.c b/crtools.c
index ea8b889..b1f6115 100644
--- a/crtools.c
+++ b/crtools.c
@@ -43,6 +43,8 @@
#include "setproctitle.h"
+#include "img-remote.h"
+
struct cr_options opts;
void init_opts(void)
@@ -62,6 +64,10 @@ void init_opts(void)
opts.cpu_cap = CPU_CAP_DEFAULT;
opts.manage_cgroups = CG_MODE_DEFAULT;
opts.ps_socket = -1;
+ opts.addr = DEFAULT_CACHE_HOST;
+ opts.port = DEFAULT_CACHE_PORT;
+ opts.local_cache_path = DEFAULT_IMG_PATH;
+ opts.local_proxy_path = DEFAULT_IMG_PATH;
opts.ghost_limit = DEFAULT_GHOST_LIMIT;
}
@@ -252,6 +258,9 @@ int main(int argc, char *argv[], char *envp[])
{ "freeze-cgroup", required_argument, 0, 1068 },
{ "ghost-limit", required_argument, 0, 1069 },
{ "irmap-scan-path", required_argument, 0, 1070 },
+ { "remote", no_argument, 0, 1071 },
+ { "local-cache-path", required_argument, 0, 1072 },
+ { "local-proxy-path", required_argument, 0, 1073 },
{ },
};
@@ -405,8 +414,8 @@ int main(int argc, char *argv[], char *envp[])
opts.addr = optarg;
break;
case 1052:
- opts.ps_port = htons(atoi(optarg));
- if (!opts.ps_port)
+ opts.port = htons(atoi(optarg));
+ if (!opts.port)
goto bad_arg;
break;
case 'j':
@@ -494,6 +503,15 @@ int main(int argc, char *argv[], char *envp[])
if (irmap_scan_path_add(optarg))
return -1;
break;
+ case 1071:
+ opts.remote = true;
+ break;
+ case 1072:
+ opts.local_cache_path = optarg;
+ break;
+ case 1073:
+ opts.local_proxy_path = optarg;
+ break;
case 'M':
{
char *aux;
@@ -642,6 +660,12 @@ int main(int argc, char *argv[], char *envp[])
if (!strcmp(argv[optind], "page-server"))
return cr_page_server(opts.daemon_mode, -1) > 0 ? 0 : 1;
+ if (!strcmp(argv[optind], "image-cache"))
+ return image_cache(opts.local_cache_path, opts.port);
+
+ if (!strcmp(argv[optind], "image-proxy"))
+ return image_proxy(opts.local_proxy_path, opts.addr, opts.port);
+
if (!strcmp(argv[optind], "service"))
return cr_service(opts.daemon_mode);
@@ -668,6 +692,8 @@ usage:
" criu page-server\n"
" criu service [<options>]\n"
" criu dedup\n"
+" criu image-cache [<options>]\n"
+" criu image-proxy [<options>]\n"
"\n"
"Commands:\n"
" dump checkpoint a process/tree identified by pid\n"
@@ -680,6 +706,8 @@ usage:
" dedup remove duplicates in memory dump\n"
" cpuinfo dump writes cpu information into image file\n"
" cpuinfo check validates cpu information read from image file\n"
+" image-cache launch destination-side cache for images sent from the source-side\n"
+" image-proxy launch source-side proxy to sent images to the destination-side\n"
);
if (usage_error) {
@@ -706,6 +734,7 @@ usage:
" restore making it the parent of the restored process\n"
" --freeze-cgroup\n"
" use cgroup freezer to collect processes\n"
+" --remote dump/restore images directly to/from remote node using image-proxy/image-cache\n"
"\n"
"* Special resources support:\n"
" -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
@@ -762,9 +791,9 @@ usage:
" when used on restore, as soon as page is restored, it\n"
" will be punched from the image.\n"
"\n"
-"Page/Service server options:\n"
+"Page/Service/image-cache/image-proxy server options:\n"
" --address ADDR address of server or service\n"
-" --port PORT port of page server\n"
+" --port PORT port of server or service\n"
" -d|--daemon run in the background after creating socket\n"
"\n"
"Other options:\n"
diff --git a/image-desc.c b/image-desc.c
index 773f2fa..48e0116 100644
--- a/image-desc.c
+++ b/image-desc.c
@@ -94,13 +94,13 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
[CR_FD_STATS] = {
.fmt = "stats-%s",
.magic = STATS_MAGIC,
- .oflags = O_SERVICE,
+ .oflags = O_SERVICE | O_FORCE_LOCAL,
},
[CR_FD_IRMAP_CACHE] = {
.fmt = "irmap-cache",
.magic = IRMAP_CACHE_MAGIC,
- .oflags = O_SERVICE,
+ .oflags = O_SERVICE | O_FORCE_LOCAL,
},
[CR_FD_FILE_LOCKS_PID] = {
diff --git a/image.c b/image.c
index dc9d6a1..4ca740e 100644
--- a/image.c
+++ b/image.c
@@ -12,6 +12,7 @@
#include "protobuf.h"
#include "protobuf/inventory.pb-c.h"
#include "protobuf/pagemap.pb-c.h"
+#include "img-remote.h"
bool fdinfo_per_id = false;
bool ns_per_id = false;
@@ -306,9 +307,26 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of
{
int ret, flags;
- flags = oflags & ~(O_NOBUF | O_SERVICE);
+ flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);
- ret = openat(dfd, path, flags, CR_FD_PERM);
+ if (opts.remote && !(oflags & O_FORCE_LOCAL)) {
+ char *snapshot_id = NULL;
+
+ snapshot_id = get_snapshot_id_from_idx(dfd);
+
+ if (snapshot_id == NULL)
+ ret = -1;
+ else if (flags == O_RDONLY) {
+ pr_info("do_open_remote_image RDONLY path=%s snapshot_id=%s\n",
+ path, snapshot_id);
+ ret = read_remote_image_connection(snapshot_id, path);
+ } else {
+ pr_info("do_open_remote_image WDONLY path=%s snapshot_id=%s\n",
+ path, snapshot_id);
+ ret = write_remote_image_connection(snapshot_id, path, O_WRONLY);
+ }
+ } else
+ ret = openat(dfd, path, flags, CR_FD_PERM);
if (ret < 0) {
if (!(flags & O_CREAT) && (errno == ENOENT)) {
pr_info("No %s image\n", path);
@@ -319,7 +337,6 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of
pr_perror("Unable to open %s", path);
goto err;
}
-
img->_x.fd = ret;
if (oflags & O_NOBUF)
bfd_setraw(&img->_x);
@@ -410,7 +427,9 @@ int open_image_dir(char *dir)
close(fd);
fd = ret;
- if (opts.img_parent) {
+ if (opts.remote) {
+ init_snapshot_id(dir);
+ } else if (opts.img_parent) {
ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);
if (ret < 0 && errno != EEXIST) {
pr_perror("Can't link parent snapshot");
diff --git a/img-remote.c b/img-remote.c
new file mode 100644
index 0000000..7f4f400
--- /dev/null
+++ b/img-remote.c
@@ -0,0 +1,272 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include "xmalloc.h"
+#include "criu-log.h"
+#include "img-remote.h"
+#include "img-remote-proto.h"
+#include "protobuf/remote-image.pb-c.h"
+#include "protobuf-desc.h"
+#include <fcntl.h>
+#include "servicefd.h"
+#include "compiler.h"
+#include "cr_options.h"
+
+#define PB_LOCAL_IMAGE_SIZE PATHLEN
+
+static char *snapshot_id;
+
+LIST_HEAD(snapshot_head);
+
+/* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an
+ * ID which corresponds to the working directory specefied by the user.
+ */
+struct snapshot {
+ char snapshot_id[PATHLEN];
+ struct list_head l;
+};
+
+struct snapshot *new_snapshot(char *snapshot_id)
+{
+ struct snapshot *s = malloc(sizeof(struct snapshot));
+
+ if (!s)
+ pr_perror("Failed to allocate snapshot structure");
+ strncpy(s->snapshot_id, snapshot_id, PATHLEN);
+ return s;
+}
+
+void add_snapshot(struct snapshot *snapshot)
+{
+ list_add_tail(&(snapshot->l), &snapshot_head);
+}
+
+static char *get_local_img_path()
+{
+ static char *local_img_path = NULL;
+
+ if (local_img_path != NULL)
+ return local_img_path;
+
+ if (strcmp(opts.local_cache_path, DEFAULT_IMG_PATH))
+ local_img_path = opts.local_cache_path;
+ else if (strcmp(opts.local_proxy_path, DEFAULT_IMG_PATH))
+ local_img_path = opts.local_proxy_path;
+
+ return local_img_path;
+}
+
+int read_remote_image_connection(char *snapshot_id, char *path)
+{
+ int error;
+ int sockfd = setup_UNIX_client_socket(get_local_img_path());
+
+ if (sockfd < 0) {
+ pr_perror("Error opening local connection for %s:%s", path, snapshot_id);
+ return -1;
+ }
+
+ if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) {
+ pr_perror("Error writing header for %s:%s", path, snapshot_id);
+ return -1;
+ }
+
+ if (read_reply_header(sockfd, &error) < 0) {
+ pr_perror("Error reading reply header for %s:%s", path, snapshot_id);
+ return -1;
+ }
+ errno = error;
+ if (!error)
+ return sockfd;
+ else if (error == ENOENT) {
+ pr_info("Image does not exist (%s:%s)\n", path, snapshot_id);
+ close(sockfd);
+ return -1;
+ }
+ pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path, snapshot_id);
+ close(sockfd);
+ return -1;
+}
+
+int write_remote_image_connection(char *snapshot_id, char *path, int flags)
+{
+ int sockfd = setup_UNIX_client_socket(get_local_img_path());
+
+ if (sockfd < 0)
+ return -1;
+
+ if (write_header(sockfd, snapshot_id, path, flags) < 0) {
+ pr_perror("Error writing header for %s:%s", path, snapshot_id);
+ return -1;
+ }
+ return sockfd;
+}
+
+int finish_remote_dump()
+{
+ pr_info("Dump side is calling finish\n");
+ int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, DUMP_FINISH, O_WRONLY);
+
+ if (fd == -1) {
+ pr_perror("Unable to open finish dump connection");
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+int skip_remote_bytes(int fd, unsigned long len)
+{
+ static char buf[4096];
+ int n = 0;
+ unsigned long curr = 0;
+
+ for (; curr < len; ) {
+ n = read(fd, buf, min(len - curr, (unsigned long)4096));
+ if (n == 0) {
+ pr_perror("Unexpected end of stream (skipping %lx/%lx bytes)",
+ curr, len);
+ return -1;
+ } else if (n > 0) {
+ curr += n;
+ } else {
+ pr_perror("Error while skipping bytes from stream (%lx/%lx)",
+ curr, len);
+ return -1;
+ }
+ }
+
+ if (curr != len) {
+ pr_perror("Unable to skip the current number of bytes: %lx instead of %lx",
+ curr, len);
+ return -1;
+ }
+ return 0;
+}
+
+static int pull_snapshot_ids()
+{
+ int n, sockfd;
+ SnapshotIdEntry *ls;
+ struct snapshot *s = NULL;
+
+ sockfd = read_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG);
+
+ /* The connection was successful but there is not file. */
+ if (sockfd < 0 && errno == ENOENT)
+ return 0;
+ else if (sockfd < 0) {
+ pr_perror("Unable to open snapshot id read connection");
+ return -1;
+ }
+
+ while (1) {
+ n = pb_read_obj(sockfd, (void **)&ls, PB_SNAPSHOT_ID);
+ if (!n) {
+ close(sockfd);
+ return n;
+ } else if (n < 0) {
+ pr_perror("Unable to read remote snapshot ids");
+ close(sockfd);
+ return n;
+ }
+
+ s = new_snapshot(ls->snapshot_id);
+ if (!s) {
+ pr_perror("Unable create new snapshot structure");
+ close(sockfd);
+ return -1;
+ }
+ add_snapshot(s);
+ pr_info("[read_snapshot ids] parent = %s\n", ls->snapshot_id);
+ }
+ free(ls);
+ close(sockfd);
+ return n;
+}
+
+int push_snapshot_id()
+{
+ int n;
+ SnapshotIdEntry rn = SNAPSHOT_ID_ENTRY__INIT;
+ int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND);
+
+ if (sockfd < 0) {
+ pr_perror("Unable to open snapshot id push connection");
+ return -1;
+ }
+
+ rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN);
+ if (!rn.snapshot_id) {
+ pr_perror("Unable to allocate snapshot id buffer");
+ close(sockfd);
+ return -1;
+ }
+ strncpy(rn.snapshot_id, snapshot_id, PATHLEN);
+
+ n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID);
+
+ xfree(rn.snapshot_id);
+ close(sockfd);
+ return n;
+}
+
+void init_snapshot_id(char *si)
+{
+ snapshot_id = si;
+}
+
+char *get_curr_snapshot_id()
+{
+ return snapshot_id;
+}
+
+int get_curr_snapshot_id_idx()
+{
+ struct snapshot *si;
+ int idx = 0;
+
+ if (list_empty(&snapshot_head))
+ pull_snapshot_ids();
+
+ list_for_each_entry(si, &snapshot_head, l) {
+ if (!strncmp(si->snapshot_id, snapshot_id, PATHLEN))
+ return idx;
+ idx++;
+ }
+
+ pr_perror("Error, could not find current snapshot id (%s) fd", snapshot_id);
+ return -1;
+}
+
+char *get_snapshot_id_from_idx(int idx)
+{
+ struct snapshot *si;
+
+ if (list_empty(&snapshot_head))
+ pull_snapshot_ids();
+
+ /* Note: if idx is the service fd then we need the current
+ * snapshot_id idx. Else we need a parent snapshot_id idx.
+ */
+ if (idx == get_service_fd(IMG_FD_OFF))
+ idx = get_curr_snapshot_id_idx();
+
+ list_for_each_entry(si, &snapshot_head, l) {
+ if (!idx)
+ return si->snapshot_id;
+ idx--;
+ }
+
+ pr_perror("Error, could not find snapshot id for idx %d", idx);
+ return NULL;
+}
+
+int get_curr_parent_snapshot_id_idx()
+{
+ return get_curr_snapshot_id_idx() - 1;
+}
diff --git a/include/cr_options.h b/include/cr_options.h
index af130dd..22f7e0d 100644
--- a/include/cr_options.h
+++ b/include/cr_options.h
@@ -74,7 +74,7 @@ struct cr_options {
struct list_head inherit_fds;
char *libdir;
bool use_page_server;
- unsigned short ps_port;
+ unsigned short port;
char *addr;
int ps_socket;
bool track_mem;
@@ -91,6 +91,9 @@ struct cr_options {
bool enable_external_masters;
bool aufs; /* auto-deteced, not via cli */
bool overlayfs;
+ bool remote;
+ char *local_cache_path;
+ char *local_proxy_path;
size_t ghost_limit;
struct list_head irmap_scan_paths;
};
diff --git a/include/image.h b/include/image.h
index 305febf..70363f6 100644
--- a/include/image.h
+++ b/include/image.h
@@ -85,7 +85,7 @@
* - unsupported
* stands for any unknown memory areas, usually means
* we don't know how to work with it and should stop
- * processing exiting with error; while the rest of bits
+ * processing exiting with error; wO_WRONLYhile the rest of bits
* are part of image ABI, this particular one must never
* be used in image.
*/
@@ -128,6 +128,7 @@ extern bool img_common_magic;
#define O_DUMP (O_WRONLY | O_CREAT | O_TRUNC)
#define O_SHOW (O_RDONLY | O_NOBUF)
#define O_RSTR (O_RDONLY)
+#define O_FORCE_LOCAL (O_SYNC)
struct cr_img {
union {
diff --git a/include/img-remote.h b/include/img-remote.h
new file mode 100644
index 0000000..706b67f
--- /dev/null
+++ b/include/img-remote.h
@@ -0,0 +1,79 @@
+#include <limits.h>
+
+#ifndef IMAGE_REMOTE_H
+#define IMAGE_REMOTE_H
+
+#define PATHLEN PATH_MAX
+#define DUMP_FINISH "DUMP_FINISH"
+#define PARENT_IMG "parent"
+#define NULL_SNAPSHOT_ID "null"
+#define DEFAULT_IMG_PATH "/tmp/criu-img-path.sock"
+#define DEFAULT_CACHE_PORT 9996
+#define DEFAULT_CACHE_HOST "localhost"
+
+/* Called by restore to get the fd correspondent to a particular path. This call
+ * will block until the connection is received.
+ */
+int read_remote_image_connection(char *snapshot_id, char *path);
+
+/* Called by dump to create a socket connection to the restore side. The socket
+ * fd is returned for further writing operations.
+ */
+int write_remote_image_connection(char *snapshot_id, char *path, int flags);
+
+/* Called by dump when everything is dumped. This function creates a new
+ * connection with a special control name. The recover side uses it to ack that
+ * no more files are coming.
+ */
+int finish_remote_dump();
+
+/* Starts an image proxy daemon (dump side). It receives image files through
+ * socket connections and forwards them to the image cache (restore side).
+ */
+int image_proxy(char *local_proxy_path, char *cache_host, unsigned short cache_port);
+
+/* Starts an image cache daemon (restore side). It receives image files through
+ * socket connections and caches them until they are requested by the restore
+ * process.
+ */
+int image_cache(char *local_cache_path, unsigned short cache_port);
+
+/* Reads (discards) 'len' bytes from fd. This is used to emulate the function
+ * lseek, which is used to advance the file needle.
+ */
+int skip_remote_bytes(int fd, unsigned long len);
+
+/* To support iterative migration, the concept of snapshot_id is introduced
+ * (only when remote migration is enabled). Each image is tagged with one
+ * snapshot_id. The snapshot_id is the image directory used for the operation
+ * that creates the image (either predump or dump). Images stored in memory
+ * (both in Image Proxy and Image Cache) are identified by their name and
+ * snapshot_id. Snapshot_ids are ordered so that we can find parent pagemaps
+ * (that will be used when restoring the process).
+ */
+
+/* Sets the current snapshot_id */
+void init_snapshot_id(char *ns);
+
+/* Returns the current snapshot_id. */
+char *get_curr_snapshot_id();
+
+/* Returns the snapshot_id index representing the current snapshot_id. This
+ * index represents the hierarchy position. For example: images tagged with
+ * the snapshot_id with index 1 are more recent than the images tagged with
+ * the snapshot_id with index 0.
+ */
+int get_curr_snapshot_id_idx();
+
+/* Returns the snapshot_id associated with the snapshot_id index. */
+char *get_snapshot_id_from_idx(int idx);
+
+/* Pushes the current snapshot_id into the snapshot_id hierarchy (into the Image
+ * Proxy and Image Cache).
+ */
+int push_snapshot_id();
+
+/* Returns the snapshot id index that preceeds the current snapshot_id. */
+int get_curr_parent_snapshot_id_idx();
+
+#endif
diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
index ab7e4f2..ad9b37f 100644
--- a/include/protobuf-desc.h
+++ b/include/protobuf-desc.h
@@ -55,16 +55,20 @@ enum {
PB_CPUINFO,
PB_USERNS,
PB_NETNS,
+ PB_REMOTE_IMAGE, /* Header for images sent from proxy to cache.*/
+ PB_LOCAL_IMAGE, /* Header for reading/writing images from/to proxy or cache. */
+ PB_LOCAL_IMAGE_REPLY, /* Header for reading/writing images reply. */
+ PB_SNAPSHOT_ID, /* Contains a single id. Used for reading/writing ids from proxy or cache. */
/* PB_AUTOGEN_STOP */
PB_PAGEMAP_HEAD,
- PB_IDS, /* 50 */
+ PB_IDS,
PB_SIGACT,
PB_NETDEV,
PB_REMAP_FPATH,
PB_SK_QUEUES,
- PB_IPCNS_MSG,
+ PB_IPCNS_MSG, /* 60 */
PB_IPCNS_MSG_ENT,
PB_MAX,
diff --git a/include/util.h b/include/util.h
index f2300a9..a732e3f 100644
--- a/include/util.h
+++ b/include/util.h
@@ -260,5 +260,6 @@ FILE *fopenat(int dirfd, char *path, char *cflags);
void split(char *str, char token, char ***out, int *n);
int fd_has_data(int lfd);
+size_t read_into_buffer(int fd, char *buff, size_t size);
#endif /* __CR_UTIL_H__ */
diff --git a/page-read.c b/page-read.c
index 832c057..6eae937 100644
--- a/page-read.c
+++ b/page-read.c
@@ -6,10 +6,13 @@
#include "cr_options.h"
#include "servicefd.h"
#include "page-read.h"
+#include "util.h"
#include "protobuf.h"
#include "protobuf/pagemap.pb-c.h"
+#include "img-remote.h"
+
#ifndef SEEK_DATA
#define SEEK_DATA 3
#define SEEK_HOLE 4
@@ -90,7 +93,10 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
return;
pr_debug("\tpr%u Skip %lx bytes from page-dump\n", pr->id, len);
- if (!pr->pe->in_parent)
+ if (!pr->pe->in_parent && opts.remote) {
+ if (skip_remote_bytes(img_raw_fd(pr->pi), len) < 0)
+ pr_perror("Unable to skip remote bytes");
+ } else if (!pr->pe->in_parent)
lseek(img_raw_fd(pr->pi), len, SEEK_CUR);
pr->cvaddr += len;
}
@@ -146,10 +152,11 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, void *bu
return ret;
} else {
int fd = img_raw_fd(pr->pi);
- off_t current_vaddr = lseek(fd, 0, SEEK_CUR);
+ /* TODO - lseek is not possible to sockets. Need to find a solution. */
+ off_t current_vaddr = opts.remote ? 0 : lseek(fd, 0, SEEK_CUR);
pr_debug("\tpr%u Read page %lx from self %lx/%"PRIx64"\n", pr->id,
vaddr, pr->cvaddr, current_vaddr);
- ret = read(fd, buf, PAGE_SIZE);
+ ret = read_into_buffer(fd, buf, PAGE_SIZE);
if (ret != PAGE_SIZE) {
pr_perror("Can't read mapping page %d", ret);
return -1;
@@ -195,9 +202,24 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
int pfd, ret;
struct page_read *parent = NULL;
- pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
- if (pfd < 0 && errno == ENOENT)
- goto out;
+ if (opts.remote) {
+ /* Note: we are replacing a real directory FD for a snapshot_id
+ * index. Since we need the parent of the current snapshot_id,
+ * we want the current snapshot_id index minus one. It is
+ * possible that dfd is already a snapshot_id index. We test it
+ * by comparing it to the service FD. When opening an image (see
+ * do_open_image) we convert the snapshot_id index into a real
+ * snapshot_id.
+ */
+ pfd = dfd == get_service_fd(IMG_FD_OFF) ?
+ get_curr_snapshot_id_idx() - 1 : dfd - 1;
+ if (pfd < 0)
+ goto out;
+ } else {
+ pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
+ if (pfd < 0 && errno == ENOENT)
+ goto out;
+ }
parent = xmalloc(sizeof(*parent));
if (!parent)
@@ -211,8 +233,8 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
xfree(parent);
parent = NULL;
}
-
- close(pfd);
+ if (!opts.remote)
+ close(pfd);
out:
pr->parent = parent;
return 0;
@@ -220,7 +242,8 @@ out:
err_free:
xfree(parent);
err_cl:
- close(pfd);
+ if (!opts.remote)
+ close(pfd);
return -1;
}
diff --git a/page-xfer.c b/page-xfer.c
index 7465ed8..2513347 100644
--- a/page-xfer.c
+++ b/page-xfer.c
@@ -17,6 +17,8 @@
#include "protobuf.h"
#include "protobuf/pagemap.pb-c.h"
+#include "img-remote.h"
+
struct page_server_iov {
u32 cmd;
u32 nr_pages;
@@ -290,7 +292,7 @@ static int get_sockaddr_in(struct sockaddr_in *addr)
return -1;
}
- addr->sin_port = opts.ps_port;
+ addr->sin_port = opts.port;
return 0;
}
@@ -310,7 +312,7 @@ int cr_page_server(bool daemon_mode, int cfd)
goto no_server;
}
- pr_info("Starting page server on port %u\n", (int)ntohs(opts.ps_port));
+ pr_info("Starting page server on port %u\n", (int)ntohs(opts.port));
sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sk < 0) {
@@ -332,14 +334,14 @@ int cr_page_server(bool daemon_mode, int cfd)
}
/* Get socket port in case of autobind */
- if (opts.ps_port == 0) {
+ if (opts.port == 0) {
if (getsockname(sk, (struct sockaddr *)&saddr, &slen)) {
pr_perror("Can't get page server name");
goto out;
}
- opts.ps_port = ntohs(saddr.sin_port);
- pr_info("Using %u port\n", opts.ps_port);
+ opts.port = ntohs(saddr.sin_port);
+ pr_info("Using %u port\n", opts.port);
}
no_server:
@@ -404,7 +406,7 @@ int connect_to_page_server(void)
}
pr_info("Connecting to server %s:%u\n",
- opts.addr, (int)ntohs(opts.ps_port));
+ opts.addr, (int)ntohs(opts.port));
page_server_sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (page_server_sk < 0) {
@@ -436,7 +438,7 @@ int disconnect_from_page_server(void)
return 0;
pr_info("Disconnect from the page server %s:%u\n",
- opts.addr, (int)ntohs(opts.ps_port));
+ opts.addr, (int)ntohs(opts.port));
if (opts.ps_socket != -1)
/*
@@ -742,12 +744,20 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
int ret;
int pfd;
- pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
- if (pfd < 0 && errno == ENOENT)
- goto out;
+ if (opts.remote) {
+ pfd = get_curr_parent_snapshot_id_idx();
+ if (pfd == -1)
+ goto out;
+ } else {
+ pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
+ if (pfd < 0 && errno == ENOENT)
+ goto out;
+ }
xfer->parent = xmalloc(sizeof(*xfer->parent));
- if (!xfer->parent) {
+ if (!xfer->parent && opts.remote) {
+ return -1;
+ } else if (!xfer->parent) {
close(pfd);
return -1;
}
@@ -757,10 +767,12 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
pr_perror("No parent image found, though parent directory is set");
xfree(xfer->parent);
xfer->parent = NULL;
- close(pfd);
+ if (!opts.remote)
+ close(pfd);
goto out;
}
- close(pfd);
+ if (!opts.remote)
+ close(pfd);
}
out:
@@ -852,6 +864,8 @@ int check_parent_page_xfer(int fd_type, long id)
{
if (opts.use_page_server)
return check_parent_server_xfer(fd_type, id);
+ else if (opts.remote)
+ return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1;
else
return check_parent_local_xfer(fd_type, id);
}
diff --git a/protobuf-desc.c b/protobuf-desc.c
index 873fd3b..2b58aab 100644
--- a/protobuf-desc.c
+++ b/protobuf-desc.c
@@ -61,6 +61,7 @@
#include "protobuf/timerfd.pb-c.h"
#include "protobuf/cpuinfo.pb-c.h"
#include "protobuf/userns.pb-c.h"
+#include "protobuf/remote-image.pb-c.h"
struct cr_pb_message_desc cr_pb_descs[PB_MAX];
diff --git a/protobuf/Makefile b/protobuf/Makefile
index 0b11852..f685fc6 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -48,6 +48,7 @@ proto-obj-y += tty.o
proto-obj-y += file-lock.o
proto-obj-y += rlimit.o
proto-obj-y += pagemap.o
+proto-obj-y += remote-image.o
proto-obj-y += siginfo.o
proto-obj-y += rpc.o
proto-obj-y += ext-file.o
diff --git a/protobuf/remote-image.proto b/protobuf/remote-image.proto
new file mode 100644
index 0000000..1212627
--- /dev/null
+++ b/protobuf/remote-image.proto
@@ -0,0 +1,20 @@
+message local_image_entry {
+ required string name = 1;
+ required string snapshot_id = 2;
+ required uint32 open_mode = 3;
+}
+
+message remote_image_entry {
+ required string name = 1;
+ required string snapshot_id = 2;
+ required uint32 open_mode = 3;
+ required uint64 size = 4;
+}
+
+message local_image_reply_entry {
+ required uint32 error = 1;
+}
+
+message snapshot_id_entry {
+ required string snapshot_id = 1;
+}
diff --git a/util.c b/util.c
index b916eca..70e7bff 100644
--- a/util.c
+++ b/util.c
@@ -845,3 +845,18 @@ int fd_has_data(int lfd)
return ret;
}
+
+size_t read_into_buffer(int fd, char *buff, size_t size)
+{
+ size_t n = 0;
+ size_t curr = 0;
+
+ while (1) {
+ n = read(fd, buff + curr, size - curr);
+ if (n < 1)
+ return n;
+ curr += n;
+ if (curr == size)
+ return size;
+ }
+}
--
1.9.1
--
Rodrigo Bruno <rbruno at gsd.inesc-id.pt>
More information about the CRIU
mailing list