[CRIU] Process Migration using Sockets - PATCH 1/2
Rodrigo Bruno
rbruno at gsd.inesc-id.pt
Sat Sep 26 15:45:49 PDT 2015
Hi,
sorry about the previous patch, I obviously got it wrong...
I hope this one is right, otherwise I will re-iterate the process.
Signed-off-by: Rodrigo Bruno <rbruno at gsd.inesc-id.pt>
>From feb41dff0e2a573e713101f2f108adc893f157d1 Mon Sep 17 00:00:00 2001
From: rbruno <rbruno at gsd.inesc-id.pt>
Date: Sat, 26 Sep 2015 22:55:01 +0100
Subject: [PATCH 1/2] Criu remote migration using sockets. Patch 1/2.
---
image-remote.c | 399 ++++++++++++++++++++++++++++++++++++++++++++
include/image-remote.h | 84 ++++++++++
protobuf/remote-image.proto | 8 +
3 files changed, 491 insertions(+)
create mode 100644 image-remote.c
create mode 100644 include/image-remote.h
create mode 100644 protobuf/remote-image.proto
diff --git a/image-remote.c b/image-remote.c
new file mode 100644
index 0000000..cce1f13
--- /dev/null
+++ b/image-remote.c
@@ -0,0 +1,399 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include "protobuf.h"
+#include "protobuf/remote-image.pb-c.h"
+
+#include "criu-log.h"
+#include "image-remote.h"
+
+#define PB_REMOTE_IMAGE_SIZE PATHLEN
+
+static char** parents = NULL;
+static int parents_occ = 0;
+static char* namespace = NULL;
+static char* parent = NULL;
+
+int setup_local_client_connection(int port)
+{
+ int sockfd;
+ struct sockaddr_in serv_addr;
+ struct hostent *server;
+
+ sockfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (sockfd < 0) {
+ pr_perror("Unable to open remote image socket to img cache");
+ return -1;
+ }
+
+ server = gethostbyname(DEFAULT_HOST);
+ if (server == NULL) {
+ pr_perror("Unable to get host by name (%s)", DEFAULT_HOST);
+ return -1;
+ }
+
+ bzero((char *) &serv_addr, sizeof (serv_addr));
+ serv_addr.sin_family = AF_INET;
+ bcopy((char *) server->h_addr,
+ (char *) &serv_addr.sin_addr.s_addr,
+ server->h_length);
+ serv_addr.sin_port = htons(port);
+
+ if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
+ pr_perror("Unable to connect to remote restore host %s", DEFAULT_HOST);
+ return -1;
+ }
+
+ return sockfd;
+}
+
+/* TODO - merge this with pb_write_one? */
+int pb_write_obj(int fd, void* obj, int type)
+{
+ u8 local[PB_REMOTE_IMAGE_SIZE];
+ void *buf = (void *)&local;
+ size_t size, packed, sent = 0, ret;
+
+ if (!cr_pb_descs[type].pb_desc) {
+ pr_err("Wrong object requested %d\n", type);
+ return -1;
+ }
+
+ size = cr_pb_descs[type].getpksize(obj);
+ if(size > PB_REMOTE_IMAGE_SIZE) {
+ pr_err("PB object too large\n");
+ return -1;
+ }
+
+ packed = cr_pb_descs[type].pack(obj, buf);
+ if (packed != size) {
+ pr_err("Failed packing PB object\n");
+ return -1;
+ }
+
+ if (write(fd, &size, sizeof(size)) != sizeof(size)) {
+ pr_err("Failed to write PB object size\n");
+ return -1;
+ }
+
+ while (sent < size) {
+ ret = write(fd, buf + sent, size - sent);
+ if (ret < 0) {
+ pr_err("Failed to write PB object\n");
+ return -1;
+ }
+ sent += ret;
+ }
+ return sent;
+}
+
+/* TODO - merge this with pb_read_one? */
+int pb_read_obj(int fd, void** pobj, int type)
+{
+ u8 local[PB_REMOTE_IMAGE_SIZE];
+ void* buf = (void*)&local;
+ size_t size , received = 0;
+ int ret;
+
+ if (!cr_pb_descs[type].pb_desc) {
+ pr_err("Wrong object requested %d\n", type);
+ return -1;
+ }
+
+ ret = read(fd, &size, sizeof(size));
+ if (!ret)
+ return received;
+ else if (ret != sizeof(size)) {
+ pr_err("Failed reading remote image PB object size\n");
+ return -1;
+ }
+
+ if (size > PB_REMOTE_IMAGE_SIZE) {
+ pr_err("PB object too large\n");
+ return -1;
+ }
+
+ while (received < size) {
+ ret = read(fd, buf + received, size - received);
+ if(!ret) {
+ pr_err("Failed reading remote image PB object\n");
+ return -1;
+ }
+ received += ret;
+ }
+
+ *pobj = cr_pb_descs[type].unpack(NULL, size, buf);
+ if (!*pobj) {
+ pr_err("Failed unpacking remote image PB object\n");
+ return -1;
+ }
+
+ return received;
+}
+
+int write_header(int fd, char* namespace, char* path)
+{
+ RemoteImageEntry ri = REMOTE_IMAGE_ENTRY__INIT;
+ ri.name = path;
+ ri.namespace_ = namespace;
+ return pb_write_obj(fd, &ri, PB_REMOTE_IMAGE);
+}
+
+int read_header(int fd, char* namespace, char* path)
+{
+ RemoteImageEntry* ri;
+ int ret;
+
+ ret = pb_read_obj(fd, (void**)&ri, PB_REMOTE_IMAGE);
+ if (ret) {
+ strncpy(namespace, ri->namespace_, PATHLEN);
+ strncpy(path, ri->name, PATHLEN);
+ }
+ /* TODO - free unpacked object? */
+ return ret;
+}
+
+int read_remote_image_connection(char* namespace, char* path)
+{
+ int sockfd;
+ char path_buf[PATHLEN], ns_buf[PATHLEN];
+
+ sockfd = setup_local_client_connection(CACHE_GET_PORT);
+ if (sockfd < 0) {
+ return -1;
+ }
+
+ if (write_header(sockfd, namespace, path) < 0) {
+ pr_perror("Error writing header for %s:%s", path, namespace);
+ return -1;
+ }
+
+ if (read_header(sockfd, ns_buf, path_buf) < 0) {
+ pr_perror("Error reading header for %s:%s", path, namespace);
+ return -1;
+ }
+
+ if (!strncmp(path_buf, path, PATHLEN) && !strncmp(ns_buf, namespace, PATHLEN)) {
+ pr_info("Image cache does have %s:%s\n", path, namespace);
+ return sockfd;
+ }
+ else if (!strncmp(path_buf, DUMP_FINISH, PATHLEN)) {
+ pr_info("Image cache does not have %s:%s\n", path, namespace);
+ close(sockfd);
+ return -1;
+ }
+ else {
+ pr_perror("Image cache returned erroneous name %s\n", path);
+ close(sockfd);
+ return -1;
+ }
+}
+
+int write_remote_image_connection(char* namespace, char* path)
+{
+ int sockfd = setup_local_client_connection(PROXY_PUT_PORT);
+ if (sockfd < 0) {
+ return -1;
+ }
+
+ if (write_header(sockfd, namespace, path) < 0) {
+ pr_perror("Error writing header for %s:%s", path, namespace);
+ return -1;
+ }
+
+ pr_info("[write_remote_image_connection] fd=%d\n", sockfd);
+
+ return sockfd;
+}
+
+int finish_remote_dump()
+{
+ pr_info("Dump side is calling finish\n");
+ int fd = write_remote_image_connection(NULL_NAMESPACE, DUMP_FINISH);
+ if (fd == -1) {
+ pr_perror("Unable to open finish dump connection");
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+int skip_remote_bytes(int fd, unsigned long len)
+{
+ static char buf[4096];
+ int n = 0;
+ unsigned long curr = 0;
+
+ for(; curr < len; ) {
+ n = read(fd, buf, MIN(len - curr, 4096));
+ if (n == 0) {
+ pr_perror("Unexpected end of stream (skipping %lx/%lx bytes)",
+ curr, len);
+ return -1;
+ }
+ else if (n > 0) {
+ curr += n;
+ }
+ else {
+ pr_perror("Error while skipping bytes from stream (%lx/%lx)",
+ curr, len);
+ return -1;
+ }
+ }
+ if ( curr != len) {
+ pr_perror("Unable to skip the current number of bytes: %lx instead of %lx",
+ curr, len);
+ return -1;
+ }
+ return 0;
+}
+
+static int write_namespaces()
+{
+ int n;
+ RemoteNamespaceEntry rn = REMOTE_NAMESPACE_ENTRY__INIT;
+
+ int sockfd = write_remote_image_connection(NULL_NAMESPACE, PARENT_IMG);
+ if (sockfd < 0) {
+ pr_perror("Unable to open namespace push connection");
+ return -1;
+ }
+
+ rn.n_namespace_ = parents_occ;
+ rn.namespace_ = xmalloc(sizeof(char*) * rn.n_namespace_);
+ if (!rn.namespace_) {
+ pr_perror("Unable to allocate namespace array");
+ close(sockfd);
+ return -1;
+ }
+
+ for (n = 0; n < rn.n_namespace_; n++) {
+ rn.namespace_[n] = xmalloc(sizeof(char) * PATHLEN);
+ if (!rn.namespace_[n]) {
+ pr_perror("Unable to allocate namespace");
+ close(sockfd);
+ return -1;
+ }
+ strncpy(rn.namespace_[n], parents[n], PATHLEN);
+ }
+
+ n = pb_write_obj(sockfd, &rn, PB_REMOTE_NAMESPACE);
+
+ for (n = 0; n < rn.n_namespace_; n++)
+ xfree(rn.namespace_[n]);
+ xfree(rn.namespace_);
+
+ close(sockfd);
+ return n;
+}
+
+static int read_namespaces() {
+ int n, sockfd;
+ RemoteNamespaceEntry* rn;
+ size_t namespaces = 0;
+ parents_occ = 0;
+
+ sockfd = read_remote_image_connection(NULL_NAMESPACE, PARENT_IMG);
+ if (sockfd < 0) {
+ pr_perror("Unable to open namespace get connection");
+ return -1;
+ }
+
+ n = pb_read_obj(sockfd, (void**)&rn, PB_REMOTE_NAMESPACE);
+ if (n)
+ namespaces = rn->n_namespace_;
+ else if (n < 0) {
+ pr_perror("Unable to read remote namepsaces");
+ close(sockfd);
+ return n;
+ }
+
+ /* The extra char pointer is for the current namespace that is added
+ if we are doing a dump or pre-dump operation. */
+ parents = malloc((namespaces + 1) * sizeof(char*));
+ if (!parents) {
+ pr_perror("Unable to allocate parents");
+ close(sockfd);
+ return -1;
+ }
+
+ while (parents_occ < namespaces) {
+ parents[parents_occ] = malloc(sizeof(char) * PATHLEN);
+ if (!parents[parents_occ]) {
+ pr_perror("Unable to allocate parent buffer");
+ close(sockfd);
+ return -1;
+ }
+ strncpy(parents[parents_occ], rn->namespace_[parents_occ], PATHLEN);
+ parents_occ++;
+ }
+
+ /* TODO - free rn memory? */
+ close(sockfd);
+ return parents_occ;
+}
+
+int push_namespace()
+{
+ if (read_namespaces() < 0) {
+ pr_perror("Failed to push namespace");
+ return -1;
+ }
+
+ parents[parents_occ] = malloc(sizeof(char) * PATHLEN);
+ if (!parents[parents_occ]) {
+ pr_perror("Unable to allocate parent buffer");
+ return -1;
+ }
+ strncpy(parents[parents_occ++], namespace, PATHLEN);
+
+ if (write_namespaces() < 0) {
+ pr_perror("Failed to push namespaces");
+ return -1;
+ }
+
+ return parents_occ;
+}
+
+void init_namespace(char* ns, char* p)
+{
+ namespace = ns;
+ parent = p;
+}
+
+char* get_current_namespace()
+{
+ return namespace;
+}
+
+int get_current_namespace_fd()
+{
+ int i = 0;
+
+ if (parents_occ == 0 && read_namespaces() < 0)
+ return -1;
+
+ for (; i < parents_occ; i++) {
+ if(!strncmp(parents[i], namespace, PATHLEN))
+ return i;
+ }
+ pr_perror("Error, could not find current namespace fd");
+ return -1;
+}
+
+char* get_namespace(int dfd)
+{
+ if (parents_occ == 0 && read_namespaces() < 0) {
+ pr_perror("No namespace in parent hierarchy (%s:%s)", namespace, parent);
+ return NULL;
+ }
+ if (dfd >= parents_occ || dfd < 0)
+ return NULL;
+ else
+ return parents[dfd];
+}
diff --git a/include/image-remote.h b/include/image-remote.h
new file mode 100644
index 0000000..7b55653
--- /dev/null
+++ b/include/image-remote.h
@@ -0,0 +1,84 @@
+#include <limits.h>
+
+#ifndef IMAGE_REMOTE_H
+#define IMAGE_REMOTE_H
+
+#define DEFAULT_HOST "localhost"
+#define PATHLEN PATH_MAX
+#define DUMP_FINISH "DUMP_FINISH"
+#define PARENT_IMG "parent"
+#define NULL_NAMESPACE "null"
+
+/* This flag is used to enable local debugging (dump + proxy + cache + restore)
+ on the same machine. This is done by using more ports. The idea is that both
+ dump and restore processes are orthogonal to this. */
+#define LOCAL_DEVEL 1
+
+#define GET_PORT 9998
+#define PUT_PORT 9996
+
+#define PROXY_GET_PORT LOCAL_DEVEL ? 9995 : GET_PORT
+#define PROXY_PUT_PORT PUT_PORT
+
+#define CACHE_GET_PORT GET_PORT
+#define CACHE_PUT_PORT LOCAL_DEVEL ? 9997 : PUT_PORT
+
+#define PROXY_FWD_PORT CACHE_PUT_PORT
+#define PROXY_FWD_HOST "localhost"
+
+/* Warning: This may be problematic because of double evaluation... */
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/* Called by restore to get the fd correspondent to a particular path. This call
+ * will block until the connection is received. */
+extern int read_remote_image_connection(char* namespace, char* path);
+
+/* Called by dump to create a socket connection to the restore side. The socket
+ * fd is returned for further writing operations. */
+extern int write_remote_image_connection(char* namespace, char* path );
+
+/* Called by dump when everything is dumped. This function creates a new
+ * connection with a special control name. The recover side uses it to ack that
+ * no more files are coming. */
+extern int finish_remote_dump();
+
+/* Starts an image proxy daemon (dump side). It receives image files through
+ * socket connections and forwards them to the image cache (restore side). */
+extern int image_proxy(char* cache_host, unsigned short cache_port);
+
+/* Starts an image cache daemon (restore side). It receives image files through
+ * socket connections and caches them until they are requested by the restore
+ * process. */
+extern int image_cache(unsigned short cache_port);
+
+/* Reads (discards) 'len' bytes from fd. This is used to emulate the function
+ * lseek, which is used to advance the file needle. */
+int skip_remote_bytes(int fd, unsigned long len);
+
+/* To support iterative migration (multiple pre-dumps before the final dump
+ * and subsequent restore, the concept of namespace is introduced. Each image
+ * is tagged with one namespace and we build a hierarchy of namespaces to
+ * represent the dependency between pagemaps. Currently, the images dir is
+ * used as namespace when the operation is marked as remote. */
+
+/* Sets the current namesapce and parent namespace. */
+void init_namespace(char* namespace, char* parent);
+
+/* Returns a pointer to the char array containing the current namesapce. */
+char* get_current_namespace();
+
+/* Returns an integer (virtual fd) representing the current namespace. */
+int get_current_namespace_fd();
+
+/* Returns the namespace associated with the virtual fd (given as argument). */
+char* get_namespace(int dfd);
+
+/* Pushes the current namespace into the namespace hierarchy. The hierarchy is
+ * read, modified, and written. */
+int push_namespace();
+
+/* Two functions used to read and write remote images' headers.*/
+int write_header(int fd, char* namespace, char* path);
+int read_header(int fd, char* namespace, char* path);
+
+#endif /* IMAGE_REMOTE_H */
diff --git a/protobuf/remote-image.proto b/protobuf/remote-image.proto
new file mode 100644
index 0000000..14763b2
--- /dev/null
+++ b/protobuf/remote-image.proto
@@ -0,0 +1,8 @@
+message remote_image_entry {
+ required string name = 1;
+ required string namespace = 2;
+}
+
+message remote_namespace_entry {
+ repeated string namespace = 1;
+}
--
1.9.1
>From 35ea3cbc9023953db258c82bac1ca7e559ad605b Mon Sep 17 00:00:00 2001
From: rbruno <rbruno at gsd.inesc-id.pt>
Date: Sat, 26 Sep 2015 23:27:30 +0100
Subject: [PATCH 2/2] Criu remote migration using sockets. Patch 1/2.
---
Makefile.crtools | 1 +
cr-dedup.c | 1 +
cr-dump.c | 16 ++++++++
crtools.c | 4 ++
image.c | 103 +++++++++++++++++++++++++++++++++++++++++++-----
include/cr_options.h | 1 +
include/protobuf-desc.h | 2 +
page-read.c | 52 +++++++++++++++++++-----
page-xfer.c | 30 +++++++++++---
protobuf-desc.c | 1 +
protobuf/Makefile | 1 +
11 files changed, 189 insertions(+), 23 deletions(-)
diff --git a/Makefile.crtools b/Makefile.crtools
index 80f704f..3667cf3 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -6,6 +6,7 @@ obj-y += crtools.o
obj-y += security.o
obj-y += image.o
obj-y += image-desc.o
+obj-y += image-remote.o
obj-y += net.o
obj-y += tun.o
obj-y += proc_parse.o
diff --git a/cr-dedup.c b/cr-dedup.c
index b453c3e..77f0b39 100644
--- a/cr-dedup.c
+++ b/cr-dedup.c
@@ -9,6 +9,7 @@
#define MAX_BUNCH_SIZE 256
+/* TODO - patch this for using remote migration using sockets */
static int cr_dedup_one_pagemap(int pid);
int cr_dedup(void)
diff --git a/cr-dump.c b/cr-dump.c
index 3af077b..f8d931a 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -83,6 +83,8 @@
#include "asm/dump.h"
+#include "image-remote.h"
+
static char loc_buf[PAGE_SIZE];
static void close_vma_file(struct vma_area *vma)
@@ -1343,6 +1345,11 @@ int cr_pre_dump_tasks(pid_t pid)
LIST_HEAD(ctls);
struct parasite_ctl *ctl, *n;
+ if (opts.remote && push_namespace() < 0) {
+ pr_err("Failed to push image namespace.\n");
+ goto err;
+ }
+
if (!opts.track_mem) {
pr_info("Enforcing memory tracking for pre-dump.\n");
opts.track_mem = true;
@@ -1448,6 +1455,11 @@ int cr_dump_tasks(pid_t pid)
pr_info("Dumping processes (pid: %d)\n", pid);
pr_info("========================================\n");
+ if (opts.remote && push_namespace() < 0) {
+ pr_err("Failed to push image namepsace.\n");
+ goto err;
+ }
+
if (init_stats(DUMP_STATS))
goto err;
@@ -1550,6 +1562,10 @@ err:
if (disconnect_from_page_server())
ret = -1;
+ if (opts.remote) {
+ finish_remote_dump();
+ }
+
close_cr_imgset(&glob_imgset);
if (bfd_flush_images())
diff --git a/crtools.c b/crtools.c
index ea8b889..bb92ff3 100644
--- a/crtools.c
+++ b/crtools.c
@@ -252,6 +252,7 @@ int main(int argc, char *argv[], char *envp[])
{ "freeze-cgroup", required_argument, 0, 1068 },
{ "ghost-limit", required_argument, 0, 1069 },
{ "irmap-scan-path", required_argument, 0, 1070 },
+ { "remote", no_argument, 0, 1071 },
{ },
};
@@ -494,6 +495,9 @@ int main(int argc, char *argv[], char *envp[])
if (irmap_scan_path_add(optarg))
return -1;
break;
+ case 1071:
+ opts.remote = true;
+ break;
case 'M':
{
char *aux;
diff --git a/image.c b/image.c
index dc9d6a1..bae0bdc 100644
--- a/image.c
+++ b/image.c
@@ -12,6 +12,7 @@
#include "protobuf.h"
#include "protobuf/inventory.pb-c.h"
#include "protobuf/pagemap.pb-c.h"
+#include "image-remote.h"
bool fdinfo_per_id = false;
bool ns_per_id = false;
@@ -218,6 +219,7 @@ struct cr_imgset *cr_glob_imgset_open(int mode)
}
static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long flags, char *path);
+static int do_open_remote_image(struct cr_img *img, int dfd, int type, unsigned long flags, char *path);
struct cr_img *open_image_at(int dfd, int type, unsigned long flags, ...)
{
@@ -251,9 +253,17 @@ struct cr_img *open_image_at(int dfd, int type, unsigned long flags, ...)
} else
img->fd = EMPTY_IMG_FD;
- if (do_open_image(img, dfd, type, oflags, path)) {
- close_image(img);
- return NULL;
+ if (opts.remote && strcmp(path, "stats-dump") && strcmp(path, "stats-restore")) {
+ if(do_open_remote_image(img, dfd, type, oflags, path)) {
+ close_image(img);
+ return NULL;
+ }
+ }
+ else {
+ if (do_open_image(img, dfd, type, oflags, path)) {
+ close_image(img);
+ return NULL;
+ }
}
return img;
@@ -350,21 +360,90 @@ err:
return -1;
}
+static int do_open_remote_image(struct cr_img *img, int dfd, int type, unsigned long oflags, char *path)
+{
+ int ret, flags;
+ char* namespace = NULL;
+
+ flags = oflags & ~(O_NOBUF | O_SERVICE);
+
+ if (dfd == get_service_fd(IMG_FD_OFF) || dfd == -1)
+ namespace = get_current_namespace();
+ else
+ namespace = get_namespace(dfd);
+
+ /* TODO - Find out what is the best solution for this file. */
+ if (!strcmp("irmap-cache", path)) {
+ ret = -1;
+ }
+ else if(namespace == NULL) {
+ ret = -1;
+ }
+ else if (flags == O_RDONLY) {
+ pr_info("do_open_remote_image RDONLY path=%s namespace=%s\n",
+ path, namespace);
+ ret = read_remote_image_connection(namespace, path);
+ }
+ else {
+ pr_info("do_open_remote_image WDONLY path=%s namespace=%s\n",
+ path, namespace);
+ ret = write_remote_image_connection(namespace, path);
+ }
+
+ if (ret < 0) {
+ pr_info("No %s (dfd=%d) image\n", path, dfd);
+ img->_x.fd = EMPTY_IMG_FD;
+ goto skip_magic;
+ }
+
+ img->_x.fd = ret;
+ if (oflags & O_NOBUF) {
+ bfd_setraw(&img->_x);
+ }
+ else {
+ if (flags == O_RDONLY)
+ ret = bfdopenr(&img->_x);
+ else
+ ret = bfdopenw(&img->_x);
+
+ if (ret)
+ goto err;
+ }
+ if (imgset_template[type].magic == RAW_IMAGE_MAGIC)
+ goto skip_magic;
+
+ if (flags == O_RDONLY)
+ ret = img_check_magic(img, oflags, type, path);
+ else
+ ret = img_write_magic(img, oflags, type);
+ if (ret)
+ goto err;
+
+skip_magic:
+ return 0;
+
+err:
+ return -1;
+}
+
int open_image_lazy(struct cr_img *img)
{
- int dfd;
+ int dfd, ret;
char *path = img->path;
img->path = NULL;
dfd = get_service_fd(IMG_FD_OFF);
- if (do_open_image(img, dfd, img->type, img->oflags, path)) {
- xfree(path);
- return -1;
+
+ if (opts.remote && strcmp(path, "stats-dump") && strcmp(path, "stats-restore")) {
+ ret = do_open_remote_image(img, dfd, img->type, img->oflags, path);
+ }
+ else {
+ ret = do_open_image(img, dfd, img->type, img->oflags, path);
}
xfree(path);
- return 0;
+ return ret ? -1 : 0;
}
void close_image(struct cr_img *img)
@@ -410,13 +489,19 @@ int open_image_dir(char *dir)
close(fd);
fd = ret;
- if (opts.img_parent) {
+ if (opts.img_parent && opts.remote) {
+ init_namespace(dir, opts.img_parent);
+ }
+ else if (opts.img_parent) {
ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);
if (ret < 0 && errno != EEXIST) {
pr_perror("Can't link parent snapshot");
goto err;
}
}
+ else if (opts.remote) {
+ init_namespace(dir, NULL);
+ }
return 0;
diff --git a/include/cr_options.h b/include/cr_options.h
index af130dd..4c611ee 100644
--- a/include/cr_options.h
+++ b/include/cr_options.h
@@ -91,6 +91,7 @@ struct cr_options {
bool enable_external_masters;
bool aufs; /* auto-deteced, not via cli */
bool overlayfs;
+ bool remote;
size_t ghost_limit;
struct list_head irmap_scan_paths;
};
diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
index ab7e4f2..232b814 100644
--- a/include/protobuf-desc.h
+++ b/include/protobuf-desc.h
@@ -55,6 +55,8 @@ enum {
PB_CPUINFO,
PB_USERNS,
PB_NETNS,
+ PB_REMOTE_IMAGE,
+ PB_REMOTE_NAMESPACE,
/* PB_AUTOGEN_STOP */
diff --git a/page-read.c b/page-read.c
index 832c057..e07fd5e 100644
--- a/page-read.c
+++ b/page-read.c
@@ -10,6 +10,8 @@
#include "protobuf.h"
#include "protobuf/pagemap.pb-c.h"
+#include "image-remote.h"
+
#ifndef SEEK_DATA
#define SEEK_DATA 3
#define SEEK_HOLE 4
@@ -90,7 +92,12 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
return;
pr_debug("\tpr%u Skip %lx bytes from page-dump\n", pr->id, len);
- if (!pr->pe->in_parent)
+ if (!pr->pe->in_parent && opts.remote) {
+ if (skip_remote_bytes(img_raw_fd(pr->pi), len) < 0) {
+ pr_perror("Unable to skip remote bytes");
+ }
+ }
+ else if (!pr->pe->in_parent)
lseek(img_raw_fd(pr->pi), len, SEEK_CUR);
pr->cvaddr += len;
}
@@ -132,6 +139,22 @@ new_pagemap:
}
}
+/* This read_ is a stronger way to read something from a fd. */
+static size_t read_(int fd, char* buff, size_t size)
+{
+ size_t n = 0;
+ size_t curr = 0;
+ while(1) {
+ n = read(fd, buff + curr, size - curr);
+ if(n < 1) {
+ return n;
+ }
+ curr += n;
+ if(curr == size) {
+ return size;
+ }
+ }
+}
static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, void *buf)
{
int ret;
@@ -146,10 +169,11 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, void *bu
return ret;
} else {
int fd = img_raw_fd(pr->pi);
- off_t current_vaddr = lseek(fd, 0, SEEK_CUR);
+ /* TODO - lseek is not possible to sockets. Need to find a solution. */
+ off_t current_vaddr = opts.remote ? 0 : lseek(fd, 0, SEEK_CUR);
pr_debug("\tpr%u Read page %lx from self %lx/%"PRIx64"\n", pr->id,
vaddr, pr->cvaddr, current_vaddr);
- ret = read(fd, buf, PAGE_SIZE);
+ ret = read_(fd, buf, PAGE_SIZE);
if (ret != PAGE_SIZE) {
pr_perror("Can't read mapping page %d", ret);
return -1;
@@ -195,9 +219,18 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
int pfd, ret;
struct page_read *parent = NULL;
- pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
- if (pfd < 0 && errno == ENOENT)
- goto out;
+ if(opts.remote) {
+ /* dfd is either the service fd or an image namespace */
+ pfd = dfd == get_service_fd(IMG_FD_OFF) ? get_current_namespace_fd() : dfd;
+ pfd -= 1;
+ if(get_namespace(pfd) == NULL)
+ goto out;
+ }
+ else {
+ pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
+ if (pfd < 0 && errno == ENOENT)
+ goto out;
+ }
parent = xmalloc(sizeof(*parent));
if (!parent)
@@ -211,8 +244,8 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
xfree(parent);
parent = NULL;
}
-
- close(pfd);
+ if(!opts.remote)
+ close(pfd);
out:
pr->parent = parent;
return 0;
@@ -220,7 +253,8 @@ out:
err_free:
xfree(parent);
err_cl:
- close(pfd);
+ if(!opts.remote)
+ close(pfd);
return -1;
}
diff --git a/page-xfer.c b/page-xfer.c
index 7465ed8..2bfea1b 100644
--- a/page-xfer.c
+++ b/page-xfer.c
@@ -17,6 +17,8 @@
#include "protobuf.h"
#include "protobuf/pagemap.pb-c.h"
+#include "image-remote.h"
+
struct page_server_iov {
u32 cmd;
u32 nr_pages;
@@ -742,12 +744,22 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
int ret;
int pfd;
- pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
- if (pfd < 0 && errno == ENOENT)
- goto out;
+ if (opts.remote) {
+ pfd = get_current_namespace_fd() - 1;
+ if (get_namespace(pfd) == NULL)
+ goto out;
+ }
+ else {
+ pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
+ if (pfd < 0 && errno == ENOENT)
+ goto out;
+ }
xfer->parent = xmalloc(sizeof(*xfer->parent));
- if (!xfer->parent) {
+ if (!xfer->parent && opts.remote) {
+ return -1;
+ }
+ else if (!xfer->parent) {
close(pfd);
return -1;
}
@@ -757,7 +769,8 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
pr_perror("No parent image found, though parent directory is set");
xfree(xfer->parent);
xfer->parent = NULL;
- close(pfd);
+ if(!opts.remote)
+ close(pfd);
goto out;
}
close(pfd);
@@ -807,6 +820,11 @@ int check_parent_local_xfer(int fd_type, int id)
return (ret == 0);
}
+int check_parent_remote_xfer() {
+ int pfd = get_current_namespace_fd() - 1;
+ return get_namespace(pfd) == NULL ? 0 : 1;
+}
+
static int page_server_check_parent(int sk, struct page_server_iov *pi)
{
int type, ret;
@@ -852,6 +870,8 @@ int check_parent_page_xfer(int fd_type, long id)
{
if (opts.use_page_server)
return check_parent_server_xfer(fd_type, id);
+ else if (opts.remote)
+ return check_parent_remote_xfer();
else
return check_parent_local_xfer(fd_type, id);
}
diff --git a/protobuf-desc.c b/protobuf-desc.c
index 873fd3b..2b58aab 100644
--- a/protobuf-desc.c
+++ b/protobuf-desc.c
@@ -61,6 +61,7 @@
#include "protobuf/timerfd.pb-c.h"
#include "protobuf/cpuinfo.pb-c.h"
#include "protobuf/userns.pb-c.h"
+#include "protobuf/remote-image.pb-c.h"
struct cr_pb_message_desc cr_pb_descs[PB_MAX];
diff --git a/protobuf/Makefile b/protobuf/Makefile
index 0b11852..f685fc6 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -48,6 +48,7 @@ proto-obj-y += tty.o
proto-obj-y += file-lock.o
proto-obj-y += rlimit.o
proto-obj-y += pagemap.o
+proto-obj-y += remote-image.o
proto-obj-y += siginfo.o
proto-obj-y += rpc.o
proto-obj-y += ext-file.o
--
1.9.1
--
Rodrigo Bruno <rbruno at gsd.inesc-id.pt>
More information about the CRIU
mailing list