[CRIU] Patch for unnamed unix sockets

artem.kuzmitskiy at lge.com artem.kuzmitskiy at lge.com
Tue Jul 21 02:56:57 PDT 2015


Hi all,

This patch for dumping and restoring unnamed unix socket, details about feature you can find -> http://criu.org/External_UNIX_socket#What_to_do_with_socketpair.28.29-s.3F.
Review please.

>From a41fde482f5cb0a08191b4fcc05f04a0a67f77f6 Mon Sep 17 00:00:00 2001
From: Artem Kuzmitskiy <artem.kuzmitskiy at lge.com>
Date: Tue, 21 Jul 2015 12:15:21 +0300
Subject: [PATCH] Added functionality for dumping\restoring unnamed unix
 sockets.

    When we call CRIU with dump option, for unnamed socket we should pass it inode
    into --ext-unix-sk in next format: socket[<inode_value>]. When we call
    CRIU with restore option, we using inherit functionality and pass socket's
    inode in same format(put socket instead of pipe).
    Details about this problem described in
    http://criu.org/External_UNIX_socket#What_to_do_with_socketpair.28.29-s.3F.
    Simple example available in test/socketpair directory.
    Usage example:
    For dump, criu dump -D images -o dump.log -v4 -x socket:[4529709] -t 13506
    For restore, criu restore -d -D images -o restore.log --pidfile restore.pid \
                 -v4 -x --inherit-fd fd[3]:socket:[4529709]

Signed-off-by: Artem Kuzmitskiy <artem.kuzmitskiy at lge.com>
---
 crtools.c                     |   6 +-
 files.c                       |  19 +-
 include/cr_options.h          |   1 +
 include/files.h               |   2 +-
 include/sockets.h             |   3 +
 lib/criu.h                    |   9 +
 sk-unix.c                     | 144 ++++++++--
 test/libcriu/run.sh           |   1 +
 test/socketpairs/Makefile     |  12 +
 test/socketpairs/socketpair.c | 595 ++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 768 insertions(+), 24 deletions(-)
 create mode 100644 test/socketpairs/Makefile
 create mode 100644 test/socketpairs/socketpair.c

diff --git a/crtools.c b/crtools.c
index b085d33..1012426 100644
--- a/crtools.c
+++ b/crtools.c
@@ -50,6 +50,7 @@ void init_opts(void)

        /* Default options */
        opts.final_state = TASK_DEAD;
+       INIT_LIST_HEAD(&opts.ext_unixsk_ids);
        INIT_LIST_HEAD(&opts.veth_pairs);
        INIT_LIST_HEAD(&opts.scripts);
        INIT_LIST_HEAD(&opts.ext_mounts);
@@ -184,7 +185,7 @@ int main(int argc, char *argv[], char *envp[])
        int log_level = LOG_UNSET;
        char *imgs_dir = ".";
        char *work_dir = NULL;
-       static const char short_opts[] = "dSsRf:F:t:p:hcD:o:n:v::xVr:jlW:L:M:";
+       static const char short_opts[] = "dSsRf:F:t:p:hcD:o:n:v::x::Vr:jlW:L:M:";
        static struct option long_opts[] = {
                { "tree",                       required_argument,      0, 't'  },
                { "pid",                        required_argument,      0, 'p'  },
@@ -201,7 +202,7 @@ int main(int argc, char *argv[], char *envp[])
                { "log-file",                   required_argument,      0, 'o'  },
                { "namespaces",                 required_argument,      0, 'n'  },
                { "root",                       required_argument,      0, 'r'  },
-               { USK_EXT_PARAM,                no_argument,            0, 'x'  },
+               { USK_EXT_PARAM,                optional_argument,      0, 'x'  },
                { "help",                       no_argument,            0, 'h'  },
                { SK_EST_PARAM,                 no_argument,            0, 1042 },
                { "close",                      required_argument,      0, 1043 },
@@ -278,6 +279,7 @@ int main(int argc, char *argv[], char *envp[])
                        opts.final_state = TASK_ALIVE;
                        break;
                case 'x':
+                       if (optarg && unix_sk_ids_parse(optarg) < 0) return 1;
                        opts.ext_unix_sk = true;
                        break;
                case 'p':
diff --git a/files.c b/files.c
index 3e69be7..6c3472c 100644
--- a/files.c
+++ b/files.c
@@ -1381,6 +1381,19 @@ static int inherit_fd_lookup_id(char *id)
        return ret;
 }

+bool inherit_fd_lookup_desc(struct file_desc *d)
+{
+       char buf[32], *id_str;
+
+       if (!d->ops->name)
+               return -1;
+
+       id_str = d->ops->name(d, buf, sizeof(buf));
+       int ret = inherit_fd_lookup_id(id_str);
+
+       return (ret < 0 ? false : true);
+}
+
 bool inherited_fd(struct file_desc *d, int *fd_p)
 {
        char buf[32], *id_str;
@@ -1398,10 +1411,12 @@ bool inherited_fd(struct file_desc *d, int *fd_p)
                return true;

        *fd_p = dup(i_fd);
-       if (*fd_p < 0)
+       if (*fd_p < 0) {
                pr_perror("Inherit fd DUP failed");
+               return false;
+       }
        else
-               pr_info("File %s will be restored from fd %d duped "
+               pr_info("File %s will be restored from fd %d dumped "
                                "from inherit fd %d\n", id_str, *fd_p, i_fd);
        return true;
 }
diff --git a/include/cr_options.h b/include/cr_options.h
index 9ab8bba..62233c3 100644
--- a/include/cr_options.h
+++ b/include/cr_options.h
@@ -45,6 +45,7 @@ struct cr_options {
        };
        bool                    restore_sibling;
        bool                    ext_unix_sk;
+       struct list_head        ext_unixsk_ids;
        bool                    shell_job;
        bool                    handle_file_locks;
        bool                    tcp_established_ok;
diff --git a/include/files.h b/include/files.h
index db7e108..25d69ad 100644
--- a/include/files.h
+++ b/include/files.h
@@ -174,7 +174,7 @@ extern int inherit_fd_add(int fd, char *key);
 extern void inherit_fd_log(void);
 extern int inherit_fd_resolve_clash(int fd);
 extern int inherit_fd_fini(void);
-
+extern bool inherit_fd_lookup_desc(struct file_desc *);
 extern bool inherited_fd(struct file_desc *, int *fdp);

 #endif /* __CR_FILES_H__ */
diff --git a/include/sockets.h b/include/sockets.h
index a3010e1..deb00a3 100644
--- a/include/sockets.h
+++ b/include/sockets.h
@@ -60,6 +60,9 @@ extern int inet_collect_one(struct nlmsghdr *h, int family, int type);
 extern int unix_receive_one(struct nlmsghdr *h, void *);
 extern int netlink_receive_one(struct nlmsghdr *hdr, void *arg);

+extern int unix_sk_ids_parse(char *optarg);
+extern int unix_sk_id_add(ino_t ino);
+
 extern int do_dump_opt(int sk, int level, int name, void *val, int len);
 #define dump_opt(s, l, n, f)   do_dump_opt(s, l, n, f, sizeof(*f))
 extern int do_restore_opt(int sk, int level, int name, void *val, int len);
diff --git a/lib/criu.h b/lib/criu.h
index 1655c02..0711313 100644
--- a/lib/criu.h
+++ b/lib/criu.h
@@ -22,11 +22,16 @@
 #include <stdbool.h>
 #include "rpc.pb-c.h"

+#ifdef __GNUG__
+extern "C" {
+#endif
+
 enum criu_service_comm {
        CRIU_COMM_SK,
        CRIU_COMM_FD
 };

+
 void criu_set_service_address(char *path);
 void criu_set_service_fd(int fd);

@@ -188,4 +193,8 @@ int criu_local_restore(criu_opts *opts);
 int criu_local_restore_child(criu_opts *opts);
 int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi));

+#ifdef __GNUG__
+}
+#endif
+
 #endif /* __CRIU_LIB_H__ */
diff --git a/sk-unix.c b/sk-unix.c
index 6c9ec25..203a6f4 100644
--- a/sk-unix.c
+++ b/sk-unix.c
@@ -65,6 +65,11 @@ struct unix_sk_listen_icon {
        struct unix_sk_listen_icon      *next;
 };

+struct  unix_sk_exception {
+       struct list_head unix_sk_list;
+       ino_t unix_sk_ino;
+};
+
 #define SK_HASH_SIZE           32

 static struct unix_sk_listen_icon *unix_listen_icons[SK_HASH_SIZE];
@@ -129,6 +134,22 @@ static int can_dump_unix_sk(const struct unix_sk_desc *sk)
        return 1;
 }

+static bool unix_sk_exception_lookup_id(ino_t ino)
+{
+       bool ret = false;
+        struct unix_sk_exception *sk;
+
+        list_for_each_entry(sk, &opts.ext_unixsk_ids, unix_sk_list) {
+                if (sk->unix_sk_ino == ino) {
+                       pr_debug("Found ino %u in exception unix sk list\n", (unsigned int)ino);
+                        ret = true;
+                       break;
+               }
+        }
+
+       return ret;
+}
+
 static int write_unix_entry(struct unix_sk_desc *sk)
 {
        int ret;
@@ -559,16 +580,22 @@ static int dump_external_sockets(struct unix_sk_desc *peer)
                                return -1;
                        }

-                       if (peer->type != SOCK_DGRAM) {
-                               show_one_unix("Ext stream not supported", peer);
-                               pr_err("Can't dump half of stream unix connection.\n");
-                               return -1;
+                       if (!peer->name && unix_sk_exception_lookup_id(sk->sd.ino)) {
+                               pr_debug("found exception for unix name-less external socket.\n");
                        }
+                       else {
+                               if (peer->type != SOCK_DGRAM) {
+                                       show_one_unix("Ext stream not supported", peer);
+                                       pr_err("Can't dump half of stream unix connection.\n");
+                                       return -1;
+                               }

-                       if (!peer->name) {
-                               show_one_unix("Ext dgram w/o name", peer);
-                               pr_err("Can't dump name-less external socket.\n");
-                               return -1;
+                               if (!peer->name) {
+                                       show_one_unix("Ext dgram w/o name", peer);
+                                       pr_err("Can't dump name-less external socket.\n");
+                                       pr_err("%d\n", sk->fd);
+                                       return -1;
+                               }
                        }
                } else if (ret < 0)
                        return -1;
@@ -691,21 +718,24 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
        if (ui->ue->uflags & USK_CALLBACK)
                return 0;

-       pr_info("\tConnect %#x to %#x\n", ui->ue->ino, peer->ue->ino);
-
        /* Skip external sockets */
        if (!list_empty(&peer->d.fd_info_head))
                futex_wait_while(&peer->prepared, 0);

-       memset(&addr, 0, sizeof(addr));
-       addr.sun_family = AF_UNIX;
-       memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
+       if (!inherit_fd_lookup_desc(d)) {

-       if (connect(fd, (struct sockaddr *)&addr,
-                               sizeof(addr.sun_family) +
-                               peer->ue->name.len) < 0) {
-               pr_perror("Can't connect %#x socket", ui->ue->ino);
-               return -1;
+               memset(&addr, 0, sizeof(addr));
+               addr.sun_family = AF_UNIX;
+               memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
+
+               pr_info("\tConnect %#x to %#x\n", ui->ue->ino, peer->ue->ino);
+
+               if (connect(fd, (struct sockaddr *)&addr,
+                                       sizeof(addr.sun_family) +
+                                       peer->ue->name.len) < 0) {
+                       pr_perror("Can't connect %#x socket", ui->ue->ino);
+                       return -1;
+               }
        }

        if (restore_sk_queue(fd, peer->ue->id))
@@ -981,7 +1011,15 @@ static int open_unix_sk(struct file_desc *d)
        struct unix_sk_info *ui;

        ui = container_of(d, struct unix_sk_info, d);
-       if (ui->flags & USK_PAIR_MASTER)
+
+       if (inherit_fd_lookup_desc(d)) {
+               int sk = -1;
+               if (inherited_fd(d, &sk))
+                       return sk;
+               else
+                       return -1;
+       }
+       else if (ui->flags & USK_PAIR_MASTER)
                return open_unixsk_pair_master(ui);
        else if (ui->flags & USK_PAIR_SLAVE)
                return open_unixsk_pair_slave(ui);
@@ -989,11 +1027,27 @@ static int open_unix_sk(struct file_desc *d)
                return open_unixsk_standalone(ui);
 }

+static char *socket_d_name(struct file_desc *d, char *buf, size_t s)
+{
+       struct unix_sk_info *ui;
+
+       ui = container_of(d, struct unix_sk_info, d);
+
+       if (snprintf(buf, s, "socket:[%d]", ui->ue->ino) >= s) {
+               pr_err("Not enough room for unixsk %d identifier string\n",
+                               ui->ue->ino);
+               return NULL;
+       }
+
+       return buf;
+}
+
 static struct file_desc_ops unix_desc_ops = {
        .type = FD_TYPES__UNIXSK,
        .open = open_unix_sk,
        .post_open = post_open_unix_sk,
        .want_transport = unixsk_should_open_transport,
+       .name = socket_d_name,
 };

 static int collect_one_unixsk(void *o, ProtobufCMessage *base)
@@ -1105,3 +1159,55 @@ int resolve_unix_peers(void)
        return 0;
 }

+int unix_sk_ids_parse(char *optarg)
+{
+       /* parsing option of the following form: --ext-unix-sk=socket:[<inode value>],
+        * socket:[<inode value>]... or short form -x socket:[<inode value>],
+        * socket:[<inode value>]...*/
+
+       char *iter = optarg;
+       char *token = NULL;
+       int success = 0;
+       const char *keyword= "socket:[";
+       while (iter != NULL){
+               if ((token = strstr(iter, keyword)) != NULL) {
+                       token += strlen(keyword);
+                       char *begin = token;
+                       char *end = strchr(token, ']');
+                       if (end == NULL) {
+                               success = 0;
+                               break;
+                       }
+                       iter = end;
+                       ino_t ino = (ino_t)strtoul(begin, &end, 10);
+                       if (ino > 0 && unix_sk_id_add(ino) > -1)
+                               success++;
+                       else {
+                               success = 0;
+                               break;
+                       }
+               }
+               else break;
+       }
+
+       if (!success){
+               pr_err("Can't parse unix socket inode from optarg: %s\n", optarg);
+               return -1;
+       }
+
+       return 0;
+}
+
+int unix_sk_id_add(ino_t ino)
+{
+       struct unix_sk_exception *unix_sk;
+
+       /* TODO: may validate inode here, but how?*/
+
+       unix_sk = xmalloc(sizeof *unix_sk);
+       if (unix_sk == NULL) return -1;
+       unix_sk->unix_sk_ino = ino;
+       list_add_tail(&unix_sk->unix_sk_list, &opts.ext_unixsk_ids);
+
+       return 0;
+}
diff --git a/test/libcriu/run.sh b/test/libcriu/run.sh
index e38c76f..d97c518 100755
--- a/test/libcriu/run.sh
+++ b/test/libcriu/run.sh
@@ -43,5 +43,6 @@ run_test test_errno

 echo "== Stopping service"
 kill -TERM $(cat wdir/s/pidfile)
+unlink libcriu.so.1
 [ $RESULT -eq 0 ] && echo "Success" || echo "FAIL"
 exit $RESULT
diff --git a/test/socketpairs/Makefile b/test/socketpairs/Makefile
new file mode 100644
index 0000000..19330cf
--- /dev/null
+++ b/test/socketpairs/Makefile
@@ -0,0 +1,12 @@
+CFLAGS += -Wall
+socketpair: socketpair.c
+clean:
+       rm -f socketpair
+run: socketpair
+       ./pi - &&               \
+       ./socketpair -c &&              \
+       ./socketpair -cl &&             \
+       ./socketpair -d &&              \
+       ./socketpair -dc &&             \
+       ./socketpair -dcl &&            \
+       true
diff --git a/test/socketpairs/socketpair.c b/test/socketpairs/socketpair.c
new file mode 100644
index 0000000..c87f197
--- /dev/null
+++ b/test/socketpairs/socketpair.c
@@ -0,0 +1,595 @@
+/*
+ * A simple demo/test program using criu's --inherit-fd command line
+ * option to restore a process with an external unix socket.
+ * Extending inherit's logic to unix sockets created by socketpair(..) syscall.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <time.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+
+
+
+
+typedef void (*sighandler_t)(int);
+typedef unsigned long ulong;
+
+/* colors */
+#define CS_PARENT              "\033[00;32m"
+#define CS_CHILD               "\033[00;33m"
+#define CS_DUMP                "\033[00;34m"
+#define CS_RESTORE             "\033[00;35m"
+#define CE                     "\033[0m"
+
+#define die(fmt, ...) do { \
+       fprintf(stderr, fmt ": %m\n", __VA_ARGS__); \
+       if (getpid() == parent_pid) { \
+               (void)kill(0, 9); \
+               exit(1); \
+       } \
+       _exit(1); \
+} while (0)
+
+#define READ_FD                0       /* pipe read fd */
+#define WRITE_FD       1       /* pipe write fd */
+#define CLASH_FD       3       /* force inherit fd clash */
+
+#define MAX_FORKS      3       /* child, checkpoint, restore */
+
+#define CRIU_BINARY            "../../criu"
+#define IMG_DIR                        "images"
+#define DUMP_LOG_FILE          "dump.log"
+#define RESTORE_LOG_FILE       "restore.log"
+#define RESTORE_PID_FILE       "restore.pid"
+#define INHERIT_FD_OPTION      "--inherit-fd"
+#define OLD_LOG_FILE           "/tmp/oldlog"
+#define NEW_LOG_FILE           "/tmp/newlog"
+
+/*
+ * Command line options (see usage()).
+ */
+
+char *cli_flags = "hm:nv";
+int max_msgs = 10;
+int vflag;
+int nflag;
+
+char pid_number[8];
+char inh_unixsk_opt[16];
+char inh_unixsk_arg[64];
+char external_sk_ino[32];
+
+char *dump_argv[] = {
+       "criu", "dump",
+       "-D", IMG_DIR, "-o", DUMP_LOG_FILE,
+       "-v4",
+        external_sk_ino,
+       "-t", pid_number,
+       NULL
+};
+
+char *restore_argv[] = {
+       "criu", "restore", "-d",
+       "-D", IMG_DIR, "-o", RESTORE_LOG_FILE,
+       "--pidfile", RESTORE_PID_FILE,
+       "-v4", "-x",
+       inh_unixsk_opt, inh_unixsk_arg,
+       NULL
+};
+
+int max_forks;
+int parent_pid;
+int child_pid;
+int criu_dump_pid;
+int criu_restore_pid;
+
+/* prototypes */
+void chld_handler(int signum);
+int parent(int *socketfd, const char* ino_child_sk);
+int child(int *socketfd, int dupfd, int newfd);
+void checkpoint_child(int child_pid, int *old_socket_namefd);
+void restore_child(int *new_socketfd, const char *old_socket_name);
+void write_to_fd(int fd, char *name, int i, int newline);
+void ls_proc_fd(int fd);
+char *socket_name(int fd);
+ino_t socket_inode(int fd);
+char *who(pid_t pid);
+void socketpair_safe(int socketfd[2]);
+pid_t fork_safe(void);
+void signal_safe(int signum, sighandler_t handler);
+int open_safe(char *pathname, int flags);
+void close_safe(int fd);
+void write_safe(int fd, char *buf, int count);
+int read_safe(int fd, char *buf, int count);
+int dup_safe(int oldfd);
+void move_fd(int oldfd, int newfd);
+void mkdir_safe(char *dirname, int mode);
+void unlink_safe(char *pathname);
+void execv_safe(char *path, char *argv[], int ls);
+pid_t waitpid_safe(pid_t pid, int *status, int options, int id);
+void prctl_safe(int option, ulong arg2, ulong arg3, ulong arg4, ulong arg5);
+int dup2_safe(int oldfd, int newfd);
+
+void usage(char *cmd)
+{
+       printf("Usage: %s [%s]\n", cmd, cli_flags);
+       printf("-h\tprint this help and exit\n");
+       printf("-m\tcount of send messages (by default 10 will send from child) \n");
+       printf("-n\tdo not use the %s option\n", INHERIT_FD_OPTION);
+       printf("-v\tverbose mode (list contents of /proc/<pid>/fd)\n");
+}
+
+int main(int argc, char *argv[])
+{
+       int ret;
+        int opt;
+       int socketfd[2];
+
+       while ((opt = getopt(argc, argv, cli_flags)) != -1) {
+               switch (opt) {
+               case 'h': usage(argv[0]); return 0;
+               case 'm':
+                       max_msgs = atoi(optarg);
+                       break;
+               case 'n': nflag++; break;
+               case 'v': vflag++; break;
+               case '?':
+                       if ('m' == optopt)
+                               fprintf (stderr, "Option -%c requires an argument.\n", optopt);
+                       else
+                       fprintf (
+                               stderr,
+                               "Unknown option character `\\x%x'.\n",
+                               optopt);
+                       return 1;
+               default: usage(argv[0]); return 1;
+               }
+       }
+
+       setbuf(stdout, NULL);
+       setbuf(stderr, NULL);
+       mkdir_safe(IMG_DIR, 0700);
+
+       socketpair_safe(socketfd);
+       child_pid = fork_safe();
+       if (child_pid > 0) {
+               parent_pid = getpid();
+
+               signal_safe(SIGCHLD, chld_handler);
+               prctl_safe(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
+
+               snprintf(external_sk_ino, sizeof(external_sk_ino), "-x %s",
+                       socket_name(socketfd[WRITE_FD]));
+
+               char unix_sk_ino[32] = {0};
+               strcpy(unix_sk_ino, socket_name(socketfd[WRITE_FD]));
+               close_safe(socketfd[WRITE_FD]);
+               ret = parent(socketfd, unix_sk_ino);
+       } else {
+               /* child */
+               int dupfd = -1;
+               int openfd = -1;
+               int logfd;
+
+               child_pid = getpid();
+
+               close_safe(socketfd[READ_FD]);
+               setsid();
+               logfd = open_safe(OLD_LOG_FILE, O_WRONLY | O_APPEND | O_CREAT);
+               dup2_safe(logfd, 1);
+               dup2_safe(logfd, 2);
+               close(logfd);
+               close(0);
+
+               ret = child(socketfd, dupfd, openfd);
+       }
+
+       return ret;
+}
+
+/*
+ * Parent reads message from its pipe with the child.
+ * After a couple of messages, it checkpoints the child
+ * which causes the child to exit.  Parent then creates
+ * a new pipe and restores the child.
+ */
+int parent(int *socketfd, const char* ino_child_sk)
+{
+       char buf[32];
+       int nread;
+
+       nread = 0;
+       while (max_forks <= MAX_FORKS) {
+               if (read_safe(socketfd[READ_FD], buf, sizeof buf) == 0)
+                       continue;
+               nread++;
+               if (vflag && nread == 1)
+                       ls_proc_fd(-1);
+
+               printf(
+                       "%s read %s from %s\n",
+                       who(0), buf,
+                       socket_name(socketfd[READ_FD]));
+
+
+               if (nread == (max_msgs / 2)) {
+                       checkpoint_child(child_pid, socketfd);
+
+                       if (!nflag) {
+                               close_safe(socketfd[READ_FD]);
+
+                               /* create a new one */
+                               printf("%s creating a new socket\n", who(0));
+                               socketpair_safe(socketfd);
+                       }
+
+                       restore_child(socketfd, ino_child_sk);
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Child sends a total of max_messages messages to its
+ * parent, half before checkpoint and half after restore.
+ */
+int child(int *socketfd, int dupfd, int openfd)
+{
+       int i;
+       int fd;
+       int num_wfds;
+       struct timespec req = { 1, 0 };
+
+       /*
+        * Count the number of pipe descriptors we'll be
+        * writing to.  At least 1 (for socketfd[WRITE_FD])
+        * and at most 3.
+        */
+       num_wfds = 1;
+       if (dupfd >= 0)
+               num_wfds++;
+       if (openfd >= 0)
+               num_wfds++;
+
+       for (i = 0; i < max_msgs; i++) {
+               /* print first time and after checkpoint */
+               if (vflag && (i == 0 || i == (max_msgs / 2)))
+                       ls_proc_fd(-1);
+
+               switch (i % num_wfds) {
+                       case 0: fd = socketfd[WRITE_FD]; break;
+                       case 1: fd = openfd; break;
+                       case 2: fd = openfd; break;
+               }
+
+               write_to_fd(fd, socket_name(socketfd[WRITE_FD]), i+1, 0);
+               /*
+                * Since sleep will be interrupted by C/R, make sure
+                * to sleep an entire second to minimize the chance of
+                * writing before criu restore has exited.  If criu is
+                * still around and we write to a broken pipe, we'll be
+                * killed but SIGCHLD will be delivered to criu instead
+                * of parent.
+                */
+               while (nanosleep(&req, NULL))
+                       ;
+               printf("\n");
+       }
+
+       return 0;
+}
+
+void chld_handler(int signum)
+{
+       int status;
+       pid_t pid;
+
+       pid = waitpid_safe(-1, &status, WNOHANG, 1);
+       if (WIFEXITED(status))
+               status = WEXITSTATUS(status);
+       if (pid == child_pid) {
+               printf("%s %s exited with status %d\n", who(0),
+                       who(pid), status);
+               /* if child exited successfully, we're done */
+               if (status == 0)
+                       exit(0);
+               /* checkpoint kills the child */
+               if (status != 9)
+                       exit(status);
+       }
+}
+
+void checkpoint_child(int child_pid, int *socketfd)
+{
+       /* prepare -t <pid> */
+       snprintf(pid_number, sizeof pid_number, "%d", child_pid);
+
+       criu_dump_pid = fork_safe();
+       if (criu_dump_pid > 0) {
+               int status;
+               pid_t pid;
+
+               pid = waitpid_safe(criu_dump_pid, &status, 0, 2);
+               if (WIFEXITED(status))
+                       status = WEXITSTATUS(status);
+               printf("%s %s exited with status %d\n", who(0),
+                       who(pid), status);
+               if (status)
+                       exit(status);
+       } else {
+               close(socketfd[READ_FD]);
+               criu_dump_pid = getpid();
+               execv_safe(CRIU_BINARY, dump_argv, 0);
+       }
+}
+
+void restore_child(int *new_socketfd, const char *old_sock_name)
+{
+       char buf[64];
+
+       criu_restore_pid = fork_safe();
+       if (criu_restore_pid > 0) {
+               int status;
+               pid_t pid;
+
+               if (!nflag)
+                       close_safe(new_socketfd[WRITE_FD]);
+
+               pid = waitpid_safe(criu_restore_pid, &status, 0, 3);
+               if (WIFEXITED(status))
+                       status = WEXITSTATUS(status);
+
+               printf("%s %s exited with status %d\n", who(0),
+                       who(pid), status);
+
+               if (status)
+                       exit(status);
+       } else {
+               criu_restore_pid = getpid();
+
+               if (!nflag) {
+                       close_safe(new_socketfd[READ_FD]);
+                       move_fd(new_socketfd[WRITE_FD], CLASH_FD);
+
+                       /* --inherit-fd fd[CLASH_FD]:socket[xxxxxx] */
+                       snprintf(inh_unixsk_opt, sizeof inh_unixsk_opt,
+                               "%s", INHERIT_FD_OPTION);
+                       snprintf(inh_unixsk_arg, sizeof inh_unixsk_arg, "fd[%d]:%s",
+                               CLASH_FD, old_sock_name);
+
+                       restore_argv[11] = inh_unixsk_opt;
+                       restore_argv[13] = NULL;
+               } else
+                       restore_argv[11] = NULL;
+
+               snprintf(buf, sizeof buf, "%s/%s", IMG_DIR, RESTORE_PID_FILE);
+               unlink_safe(buf);
+               execv_safe(CRIU_BINARY, restore_argv, 1);
+       }
+}
+
+void write_to_fd(int fd, char *name, int i, int newline)
+{
+       int n;
+       char buf[16];   /* fit "hello d\n" for small d */
+
+       n = snprintf(buf, sizeof buf, "hello %d", i);
+
+       printf("%s writing %s to %s via fd %d\n", who(0), buf, name, fd);
+
+       if (newline) {
+               buf[n++] = '\n';
+               buf[n] = '\0';
+       }
+       write_safe(fd, buf, strlen(buf));
+}
+
+void ls_proc_fd(int fd)
+{
+       char cmd[128];
+
+       if (fd == -1)
+               snprintf(cmd, sizeof cmd, "ls -l /proc/%d/fd", getpid());
+       else
+               snprintf(cmd, sizeof cmd, "ls -l /proc/%d/fd/%d", getpid(), fd);
+       printf("%s %s\n", who(0), cmd);
+       system(cmd);
+}
+
+char *socket_name(int fd)
+{
+       static char sock_name[64];
+       char path[64];
+
+       snprintf(path, sizeof path, "/proc/self/fd/%d", fd);
+       if (readlink(path, sock_name, sizeof sock_name) == -1)
+               die("readlink: path=%s", path);
+       return sock_name;
+}
+
+ino_t socket_inode(int fd)
+{
+         struct stat sbuf;
+
+         if (fstat(fd, &sbuf) == -1)
+                 die("fstat: fd=%i", fd);
+
+         return sbuf.st_ino;
+}
+
+/*
+ * Use two buffers to support two calls to
+ * this function in a printf argument list.
+ */
+char *who(pid_t pid)
+{
+       static char pidstr1[64];
+       static char pidstr2[64];
+       static char *cp;
+       char *np;
+       char *ep;
+       int p;
+
+       p = pid ? pid : getpid();
+       if (p == parent_pid) {
+               np = "parent";
+               ep = CS_PARENT;
+       } else if (p == child_pid) {
+               np = "child";
+               ep = CS_CHILD;
+       } else if (p == criu_dump_pid) {
+               np = "dump";
+               ep = CS_DUMP;
+       } else if (p == criu_restore_pid) {
+               np = "restore";
+               ep = CS_RESTORE;
+       } else
+               np = "???";
+
+       cp = (cp == pidstr1) ? pidstr2 : pidstr1;
+       snprintf(cp, sizeof pidstr1, "%s[%s %d]", pid ? "" : ep, np, p);
+       return cp;
+}
+
+void socketpair_safe(int socketfd[2])
+{
+       if (socketpair(AF_UNIX, SOCK_STREAM, 0, socketfd) == -1)
+               die("socketpair %p", socketfd);
+}
+
+pid_t fork_safe(void)
+{
+       pid_t pid;
+
+       if ((pid = fork()) == -1)
+               die("fork: pid=%d", pid);
+       max_forks++;
+       return pid;
+}
+
+void signal_safe(int signum, sighandler_t handler)
+{
+       if (signal(signum, handler) == SIG_ERR)
+               die("signal: signum=%d", signum);
+}
+
+int open_safe(char *pathname, int flags)
+{
+       int fd;
+
+       if ((fd = open(pathname, flags, 0777)) == -1)
+               die("open: pathname=%s", pathname);
+       return fd;
+}
+
+void close_safe(int fd)
+{
+       if (close(fd) == -1)
+               die("close: fd=%d", fd);
+}
+
+void write_safe(int fd, char *buf, int count)
+{
+       if (write(fd, buf, count) != count) {
+               die("write: fd=%d buf=\"%s\" count=%d errno=%d",
+                       fd, buf, count, errno);
+       }
+}
+
+int read_safe(int fd, char *buf, int count)
+{
+       int n;
+
+       if ((n = read(fd, buf, count)) < 0)
+               die("read: fd=%d count=%d", fd, count);
+       buf[n] = '\0';
+       return n;
+}
+
+int dup_safe(int oldfd)
+{
+       int newfd;
+
+       if ((newfd = dup(oldfd)) == -1)
+               die("dup: oldfd=%d", oldfd);
+       return newfd;
+}
+
+int dup2_safe(int oldfd, int newfd)
+{
+       if (dup2(oldfd, newfd) != newfd)
+               die("dup2: oldfd=%d newfd=%d", oldfd, newfd);
+       return newfd;
+}
+
+void move_fd(int oldfd, int newfd)
+{
+       if (oldfd != newfd) {
+               dup2_safe(oldfd, newfd);
+               close_safe(oldfd);
+       }
+}
+
+void mkdir_safe(char *dirname, int mode)
+{
+       if (mkdir(dirname, mode) == -1 && errno != EEXIST)
+               die("mkdir dirname=%s mode=0x%x\n", dirname, mode);
+}
+
+void unlink_safe(char *pathname)
+{
+       if (unlink(pathname) == -1 && errno != ENOENT) {
+               die("unlink: pathname=%s\n", pathname);
+       }
+}
+
+void execv_safe(char *path, char *argv[], int ls)
+{
+       int i;
+       struct timespec req = { 0, 1000000 };
+
+       printf("\n%s ", who(0));
+       for (i = 0; argv[i] != NULL; i++)
+               printf("%s ", argv[i]);
+       printf("\n");
+
+       /* give parent a chance to wait for us */
+       while (nanosleep(&req, NULL))
+               ;
+
+       if (vflag && ls)
+               ls_proc_fd(-1);
+
+       execv(path, argv);
+       die("execv: path=%s", path);
+}
+
+pid_t waitpid_safe(pid_t pid, int *status, int options, int id)
+{
+       pid_t p;
+
+       p = waitpid(pid, status, options);
+       if (p == -1)
+               fprintf(stderr, "waitpid pid=%d id=%d %m\n", pid, id);
+       return p;
+}
+
+void prctl_safe(int option, ulong arg2, ulong arg3, ulong arg4, ulong arg5)
+{
+       if (prctl(option, arg2, arg3, arg4, arg5) == -1)
+               die("prctl: option=0x%x", option);
+}
--
2.1.4



More information about the CRIU mailing list