[CRIU] Another attempt at migration processes

Adrian Reber adrian at lisas.de
Tue Aug 14 10:55:44 EDT 2012


On Tue, Jul 03, 2012 at 12:12:00PM +0400, Pavel Emelyanov wrote:
> > The patch is still very rough and meant as an RFC to see if this is
> > something criu is interested in and if the implementation in way like I
> > did could be accepted by upstream?
> 
> Yes, we're interested in this type of migration, but the implementation
> I see should be improved.
> 
> Two things should be fixes in the first step: 1. don't use printf-s, use
> crtools logging engine 2. don't push the host/port/socket arguments all
> over the code, try to reuse the open_image/cr_fdset_open engine to work 
> with sockets.

See attached patch which tries to do much better than the previous
version. Using the functions in image.c to automatically know that the
data should be read over the network makes the patch much smaller.

This is still only the implementation for the restore part.

		Adrian
-------------- next part --------------
commit 9cb4a25f4150416410cd399ca03cf5a56195e144
Author: Adrian Reber <adrian at lisas.de>
Date:   Tue Aug 14 15:36:42 2012 +0200

    cr-restore: read pages to temporary file if migrating
    
    This is a hack until it is possible to read the pages-*.img directly
    from restorer.c over a network socket. This workaround opens
    a temporary file on /dev/shm which is then used by restorer.c to
    read pages-*.img.

diff --git a/cr-restore.c b/cr-restore.c
index fe07599..f9b8c88 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1171,7 +1171,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
 	struct thread_restore_args *thread_args;
 
 	LIST_HEAD(self_vma_list);
-	int fd_pages = -1;
 	int i;
 
 	pr_info("Restore via sigreturn\n");
@@ -1195,12 +1194,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
 	BUILD_BUG_ON(SHMEMS_SIZE % PAGE_SIZE);
 	BUILD_BUG_ON(TASK_ENTRIES_SIZE % PAGE_SIZE);
 
-	fd_pages = open_image_ro(CR_FD_PAGES, pid);
-	if (fd_pages < 0) {
-		pr_perror("Can't open pages-%d", pid);
-		goto err;
-	}
-
 	restore_code_len	= sizeof(restorer_blob);
 	restore_code_len	= round_up(restore_code_len, 16);
 
@@ -1315,7 +1308,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
 	task_args->pid		= pid;
 	task_args->logfd	= log_get_fd();
 	task_args->sigchld_act	= sigchld_act;
-	task_args->fd_pages	= fd_pages;
 
 	strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm));
 
@@ -1339,6 +1331,49 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
 	if (ret < 0)
 		goto err;
 
+	task_args->fd_pages = open_image_ro(CR_FD_PAGES, pid);
+	if (task_args->fd_pages < 0) {
+		pr_perror("Can't open pages-%d", pid);
+		goto err;
+	}
+
+	/* check if we are running in network migration mode */
+	if (open_listen_socket(NULL, 0) >= 0) {
+		/* yes we are. save the pages to temporary file
+		 * until there is a better way to directly read
+		 * the network socket from restorer.c */
+
+		int tmp_fd;
+		char buffer[PATH_MAX];
+		char tmp[PAGE_SIZE];
+		int n;
+
+		i = 0;
+		strcpy(buffer, "/dev/shm/pages-XXXXXX");
+		tmp_fd = mkstemp(buffer);
+		while (1) {
+			n = read(task_args->fd_pages, tmp, PAGE_SIZE);
+
+			if (n == 0)
+				break;
+
+			if (n == -1) {
+				pr_perror("Pages read failed");
+				goto err;
+			}
+
+			i = write(tmp_fd, tmp, n);
+			if (i!=n) {
+				pr_perror("Oops, writing failed");
+				goto err;
+			}
+
+		}
+		lseek(tmp_fd, 0, SEEK_SET);
+		close(task_args->fd_pages);
+		task_args->fd_pages = tmp_fd;
+	}
+
 	mutex_init(&task_args->rst_lock);
 
 	/*

commit 963cf4da50f322c6f3fae81e78a8460da0e0dc61
Author: Adrian Reber <adrian at lisas.de>
Date:   Tue Aug 14 15:21:56 2012 +0200

    cr-restore: close network socket before restoring file descriptors
    
    To make sure the network socket for migration does not conflict
    with the file descriptors which will be restored the network
    socket is closed and reopened.

diff --git a/cr-restore.c b/cr-restore.c
index 65f698b..fe07599 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -286,6 +286,7 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (prepare_fds(me))
 		return -1;
 
+	reopen_listen_socket();
 	if (prepare_fs(pid))
 		return -1;
 
@@ -425,6 +426,9 @@ static int restore_one_task(int pid)
 	int fd, ret;
 	CoreEntry *core;
 
+	/* try to reopen the network listen socket */
+	reopen_listen_socket();
+
 	if (me->state == TASK_HELPER)
 		return restore_one_fake(pid);
 
@@ -442,6 +446,7 @@ static int restore_one_task(int pid)
 		ret = -1;
 		goto out;
 	}
+	close_listen_socket();
 
 	switch ((int)core->tc->task_state) {
 	case TASK_ALIVE:
@@ -902,6 +907,12 @@ int cr_restore_tasks(pid_t pid, struct cr_options *opts)
 	if (prepare_pstree_ids() < 0)
 		return -1;
 
+	/*
+	 * closing network listen socket to be able to restore all previously
+	 * opened file desriptors.
+	 */
+	close_listen_socket();
+
 	futex_set(&task_entries->nr_in_progress, task_entries->nr_tasks + task_entries->nr_helpers);
 
 	return restore_root_task(root_item, opts);

commit 3c8dd228f2ceaf6bb00fdfd7c0244ab7a682fefb
Author: Adrian Reber <adrian at lisas.de>
Date:   Mon Aug 13 17:15:52 2012 +0200

    image: add functions to close and re-open the network listen socket
    
    During the restore of the file descriptors all open network
    sockets have to be closed. The network hostname and port
    are stored during open_listen_socket() and after close_listen_socket()
    they can be re-used by calling reopen_listen_socket().

diff --git a/image.c b/image.c
index 2115412..969017a 100644
--- a/image.c
+++ b/image.c
@@ -218,6 +218,8 @@ struct cr_fdset *cr_glob_fdset_open(int mode)
 
 static int image_dir_fd = -1;
 static int sockfd = -1;
+static char network_host[255] = "";
+static int network_port = -1;
 
 int open_image(int type, unsigned long flags, ...)
 {
@@ -309,6 +311,15 @@ int open_listen_socket(char *host, int port)
 	struct hostent *server;
 	struct sockaddr_in serveraddr;
 
+	if (sockfd != -1)
+		return sockfd;
+
+	/* if host is not specified return early */
+	if (host == NULL)
+		return -1;
+
+	pr_info("Listening on %s:%d\n", host, port);
+
 	sockfd = socket(AF_INET, SOCK_STREAM, 0);
 	if (sockfd < 0) {
 		pr_perror("ERROR opening socket");
@@ -336,5 +347,22 @@ int open_listen_socket(char *host, int port)
 		return -1;
 	}
 
+	strncpy(network_host, host, 255);
+	network_port = port;
 	return sockfd;
 }
+
+void close_listen_socket()
+{
+	if (sockfd == -1)
+		return;
+	close_safe(&sockfd);
+	sockfd = -1;
+}
+
+int reopen_listen_socket()
+{
+	if (network_port != -1)
+		return open_listen_socket(network_host, network_port);
+	return 0;
+}
diff --git a/include/crtools.h b/include/crtools.h
index 3d4dc39..bf697c9 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -135,6 +135,8 @@ extern int open_image_dir(void);
 extern void close_image_dir(void);
 
 extern int open_listen_socket(char *host, int port);
+extern void close_listen_socket();
+extern int reopen_listen_socket();
 
 int open_image(int type, unsigned long flags, ...);
 #define open_image_ro(type, ...) open_image(type, O_RDONLY, ##__VA_ARGS__)

commit 8f4be01ac8a5eba1cc945320eb0986c642d1c233
Author: Adrian Reber <adrian at lisas.de>
Date:   Thu Aug 2 15:36:39 2012 +0200

    image: use network sockets to read restore image (if listening)

diff --git a/crtools.c b/crtools.c
index e3159c2..1f9f680 100644
--- a/crtools.c
+++ b/crtools.c
@@ -213,6 +213,7 @@ int main(int argc, char *argv[])
 	case 'r':
 		if (opts.host && (opts.port > 0)) {
 			ret = open_listen_socket(opts.host, opts.port);
+			ret = cr_restore_tasks(pid, &opts);
 			break;
 		}
 		if (!pid)
diff --git a/image.c b/image.c
index 580c69e..2115412 100644
--- a/image.c
+++ b/image.c
@@ -229,6 +229,15 @@ int open_image(int type, unsigned long flags, ...)
 	vsnprintf(path, PATH_MAX, fdset_template[type].fmt, args);
 	va_end(args);
 
+	if (sockfd != -1) {
+		ret = accept(sockfd, NULL, NULL);
+		if (ret < 0) {
+			pr_perror("Unable to accept()");
+			goto err;
+		}
+		goto net_only;
+	}
+
 	if (flags & O_EXCL) {
 		ret = unlinkat(image_dir_fd, path, 0);
 		if (ret && errno != ENOENT) {
@@ -243,6 +252,7 @@ int open_image(int type, unsigned long flags, ...)
 		goto err;
 	}
 
+net_only:
 	if (flags == O_RDONLY) {
 		u32 magic;
 
@@ -253,12 +263,17 @@ int open_image(int type, unsigned long flags, ...)
 			goto err;
 		}
 	} else {
+		if (sockfd != -1)
+			goto err;
 		if (write_img(ret, &fdset_template[type].magic))
 			goto err;
 	}
 
 	return ret;
 err:
+	close(ret);
+	if (sockfd !=-1)
+		close(sockfd);
 	return -1;
 }
 
@@ -316,5 +331,10 @@ int open_listen_socket(char *host, int port)
 		return -1;
 	}
 
+	if (listen(sockfd, 5) < 0) {
+		pr_perror("listen() failed");
+		return -1;
+	}
+
 	return sockfd;
 }

commit 930ad5fafe1472aa8bae959e81167e519d5f69a2
Author: Adrian Reber <adrian at lisas.de>
Date:   Thu Aug 2 13:35:43 2012 +0200

    crtools: added command-line options for reading images over network socket

diff --git a/crtools.c b/crtools.c
index d59934f..e3159c2 100644
--- a/crtools.c
+++ b/crtools.c
@@ -57,7 +57,7 @@ int main(int argc, char *argv[])
 	int log_inited = 0;
 	int log_level = 0;
 
-	static const char short_opts[] = "dsf:t:hcD:o:n:vxV";
+	static const char short_opts[] = "dsf:t:hcD:o:n:l:vxV";
 
 	BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE);
 
@@ -66,6 +66,8 @@ int main(int argc, char *argv[])
 
 	/* Default options */
 	opts.final_state = TASK_DEAD;
+	opts.host = NULL;
+	opts.port = -1;
 
 	while (1) {
 		static struct option long_opts[] = {
@@ -76,6 +78,7 @@ int main(int argc, char *argv[])
 			{ "file", required_argument, 0, 'f' },
 			{ "images-dir", required_argument, 0, 'D' },
 			{ "log-file", required_argument, 0, 'o' },
+			{ "listen", required_argument, 0, 'l' },
 			{ "namespaces", required_argument, 0, 'n' },
 			{ "ext-unix-sk", no_argument, 0, 'x' },
 			{ "help", no_argument, 0, 'h' },
@@ -122,6 +125,17 @@ int main(int argc, char *argv[])
 				return -1;
 			log_inited = 1;
 			break;
+		case 'l':
+			opts.host = strchr(optarg, ':');
+			if (!opts.host)
+				goto usage;
+			/* port needs to be at least one digit long after ':' */
+			if (strlen(optarg) < strchr(optarg, ':') - optarg + 2)
+				goto usage;
+			opts.port = atoi(strchr(optarg, ':') + 1);
+			optarg[strchr(optarg, ':') - optarg] = 0;
+			opts.host = strdup(optarg);
+			break;
 		case 'n':
 			if (parse_ns_string(optarg))
 				return -1;
@@ -197,6 +211,10 @@ int main(int argc, char *argv[])
 		ret = cr_dump_tasks(pid, &opts);
 		break;
 	case 'r':
+		if (opts.host && (opts.port > 0)) {
+			ret = open_listen_socket(opts.host, opts.port);
+			break;
+		}
 		if (!pid)
 			goto opt_pid_missing;
 		ret = cr_restore_tasks(pid, &opts);
@@ -261,6 +279,9 @@ usage:
 	pr_msg("  -h|--help             show this text\n");
 	pr_msg("  -V|--version          show version\n");
 
+	pr_msg("\nRestore options:\n");
+	pr_msg("  -l|--listen <ip:port>  listen on ip:port for incoming restore objects\n");
+
 	return -1;
 
 opt_pid_missing:
diff --git a/image.c b/image.c
index bc63427..580c69e 100644
--- a/image.c
+++ b/image.c
@@ -1,5 +1,7 @@
 #include <unistd.h>
 #include <stdarg.h>
+#include <string.h>
+#include <netdb.h>
 #include "crtools.h"
 #include "image.h"
 #include "eventpoll.h"
@@ -215,6 +217,7 @@ struct cr_fdset *cr_glob_fdset_open(int mode)
 }
 
 static int image_dir_fd = -1;
+static int sockfd = -1;
 
 int open_image(int type, unsigned long flags, ...)
 {
@@ -285,3 +288,33 @@ void close_image_dir(void)
 	close(image_dir_fd);
 	image_dir_fd = -1;
 }
+
+int open_listen_socket(char *host, int port)
+{
+	struct hostent *server;
+	struct sockaddr_in serveraddr;
+
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sockfd < 0) {
+		pr_perror("ERROR opening socket");
+		return -1;
+	}
+
+	server = gethostbyname(host);
+	if (server == NULL) {
+		pr_perror("ERROR, no such host as %s\n", host);
+		return -1;
+	}
+
+	memset((char *) &serveraddr, 0, sizeof(serveraddr));
+	serveraddr.sin_family = AF_INET;
+	memcpy((char *)&serveraddr.sin_addr.s_addr,(char *)server->h_addr, server->h_length);
+	serveraddr.sin_port = htons(port);
+
+	if (bind(sockfd, (struct sockaddr *)&serveraddr, sizeof(serveraddr)) < 0) {
+		pr_perror("ERROR connecting\n");
+		return -1;
+	}
+
+	return sockfd;
+}
diff --git a/include/crtools.h b/include/crtools.h
index ded767d..3d4dc39 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -83,6 +83,8 @@ struct cr_options {
 	unsigned int		namespaces_flags;
 	bool			log_file_per_pid;
 	char			*output;
+	char			*host;
+	int			port;
 };
 
 extern struct cr_options opts;
@@ -132,6 +134,8 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX];
 extern int open_image_dir(void);
 extern void close_image_dir(void);
 
+extern int open_listen_socket(char *host, int port);
+
 int open_image(int type, unsigned long flags, ...);
 #define open_image_ro(type, ...) open_image(type, O_RDONLY, ##__VA_ARGS__)
 


More information about the CRIU mailing list