[CRIU] [PATCH] Make restored processes inherit file descriptors from criu
Andrew Vagin
avagin at parallels.com
Mon Nov 24 23:12:27 PST 2014
On Mon, Nov 24, 2014 at 02:18:23PM -0800, Saied Kazemi wrote:
> There are cases where a process's file descriptor cannot be restored
> from the checkpointed image. For example, a pipe file descriptor with
> one end in the checkpointed process and the other end in a separate
> process (that was not part of the checkpointed process tree) cannot be
> restored because after checkpoint the pipe would be broken and removed.
> In these cases, criu's caller should set up a new file descriptor to be
> inherited by the restored process.
>
> When checkpointing, the argument of --inherit-fd is a string that
> identifies the file. It can be obtained from the target of the symbolic
> link in /proc/<pid>/fd (for example, /path/to/log/file or pipe:[234956]).
>
> When restoring, the argument of --inherit-fd has the form fd[%d]:%s,
> where %d tells criu which file descriptor to use for restoring file
> identified by %s.
How do you handle conflicts between inherit-fd sets and restored file
descriptors.
For example I have a process with the following set of file descriptors:
0 -> tty
1 -> tty
2 -> tty
3 -> /test
4 -> pipe:[234956]
On restore I create a pipe and use --inherit-fd fd3:pipe:[234956]. In
this case the 3 fd will be restored before the 4 fd. Why does this
operation not overwrite the inherit-fd descriptor?
Why does criu not return an error?
fd 3 already in use (called at files.c:867
>
> Note that while it's OK to omit --inherit-fd for checkpoint and specify
> it only for restore, it's better to always specify it because checkpoint
> code will not attempt to dump an inherit fd file.
>
> Signed-off-by: Saied Kazemi <saied at google.com>
> ---
> crtools.c | 15 +++++
> files-reg.c | 8 +++
> files.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++
> include/cr_options.h | 1 +
> include/files.h | 8 +++
> pipes.c | 33 +++++++++-
Does it work for only pipes now?
> util.c | 9 +++
I would like to have tests for this functionality.
> 7 files changed, 255 insertions(+), 2 deletions(-)
>
> diff --git a/crtools.c b/crtools.c
> index 0ac667c..d7bb6c8 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -52,6 +52,7 @@ void init_opts(void)
> INIT_LIST_HEAD(&opts.veth_pairs);
> INIT_LIST_HEAD(&opts.scripts);
> INIT_LIST_HEAD(&opts.ext_mounts);
> + INIT_LIST_HEAD(&opts.inherit_fds);
> INIT_LIST_HEAD(&opts.new_cgroup_roots);
>
> opts.cpu_cap = CPU_CAP_DEFAULT;
> @@ -187,6 +188,7 @@ int main(int argc, char *argv[], char *envp[])
> { "exec-cmd", no_argument, 0, 1059},
> { "manage-cgroups", no_argument, 0, 1060},
> { "cgroup-root", required_argument, 0, 1061},
> + { "inherit-fd", required_argument, 0, 1062},
> { },
> };
>
> @@ -392,6 +394,10 @@ int main(int argc, char *argv[], char *envp[])
> return -1;
> }
> break;
> + case 1062:
> + if (inherit_fd_add(optarg) < 0)
> + return 1;
> + break;
> case 'M':
> {
> char *aux;
> @@ -469,6 +475,9 @@ int main(int argc, char *argv[], char *envp[])
> if (log_init(opts.output))
> return 1;
>
> + /* log the inherit fd list now that log is ready */
> + inherit_fd_log();
> +
> if (opts.img_parent)
> pr_info("Will do snapshot from %s\n", opts.img_parent);
>
> @@ -491,6 +500,12 @@ int main(int argc, char *argv[], char *envp[])
> if (tree_id)
> pr_warn("Using -t with criu restore is obsoleted\n");
>
> + if (inherit_fd_validate_fds() < 0) {
> + pr_msg("Some inherit fd items are invalid\n");
> + pr_msg("See log file for details\n");
> + return 1;
> + }
> +
> ret = cr_restore_tasks();
> if (ret == 0 && opts.exec_cmd) {
> close_pid_proc();
> diff --git a/files-reg.c b/files-reg.c
> index 20b50a0..bfd1896 100644
> --- a/files-reg.c
> +++ b/files-reg.c
> @@ -962,6 +962,14 @@ int open_path(struct file_desc *d,
> }
>
> mntns_root = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
> +
> +pr_debug(">>> open_path(): rfi->path=(%s)\n", rfi->path);
^^^^ I think you forgot to remove this message
> + tmp = inherit_fd_lookup_id(rfi->path, true);
> + if (tmp >= 0) {
> + pr_info("File %s will be restored from fd %d\n", rfi->path, tmp);
> + return tmp;
> + }
> +
> tmp = open_cb(mntns_root, rfi, arg);
> if (tmp < 0) {
> pr_perror("Can't open file %s", rfi->path);
> diff --git a/files.c b/files.c
> index f009e7d..7bace3a 100644
> --- a/files.c
> +++ b/files.c
> @@ -37,6 +37,7 @@
> #include "imgset.h"
> #include "fs-magic.h"
> #include "proc_parse.h"
> +#include "cr_options.h"
>
> #include "parasite.h"
> #include "parasite-syscall.h"
> @@ -1176,3 +1177,185 @@ int shared_fdt_prepare(struct pstree_item *item)
>
> return 0;
> }
> +
> +/*
> + * Inherit fd support.
> + *
> + * There are cases where a process's file descriptor cannot be restored
> + * from the checkpointed image. For example, a pipe file descriptor with
> + * one end in the checkpointed process and the other end in a separate
> + * process (that was not part of the checkpointed process tree) cannot be
> + * restored because after checkpoint the pipe would be broken and removed.
> + * In these cases, criu's caller should set up a new file descriptor to be
> + * inherited by the restored process.
> + *
> + * When checkpointing, the argument of --inherit-fd is a string that
> + * identifies the file. It can be obtained from the target of the
> + * symbolic link in /proc/<pid>/fd (for example, pipe:[234956]).
> + *
> + * When restoring, the argument of --inherit-fd has the form fd[%d]:%s,
> + * where %d tells criu which file descriptor to use for restoring %s.
> + *
> + * Note that while it's OK to omit --inherit-fd for checkpoint and
> + * specify it only for restore, it's better to always specify it because
> + * checkpoint code will not attempt to dump an inherit fd file.
> + *
> + * In the following example from http://criu.org/Simple_loop, we first
> + * redirect the output of test.sh to /tmp/old and then use criu's file
> + * descriptor 7 to change /tmp/old with /tmp/new:
> + *
> + * $ ./test.sh > /tmp/old
> + * $ sudo criu dump -j -t <pid>
> + * $ sudo criu restore -j --inherit-fd 'fd[7]:tmp/old' 7> /tmp/new
> + *
> + * Notice that the path in the argument of --inherit-fd is relative to the
> + * root of the process (i.e., tmp/old).
> + *
> + * Below is an example of how criu will be called from Docker to checkpoint
> + * and restore a container running in detached mode:
> + *
> + * $ docker run -d ...
> + * $ docker checkpoint <container-id>
> + * |
> + * +---> criu dump ... --inherit-fd pipe:[686787] pipe:[686788]
> + * $ docker restore <container-id>
> + * |
> + * +---> criu restore ... --inherit-fd fd[1]:pipe:[686787] fd[2]:pipe:[686788]
> + */
> +
> +struct inherit_fd {
> + char *inh_id; /* identifier */
> + int inh_fd; /* file descriptor to restore from */
> + struct list_head l;
> +};
> +
> +/*
> + * We can't print diagnostics messages in this function because the
> + * log file isn't initialized yet.
> + */
> +int inherit_fd_add(char *optarg)
> +{
> + char *cp;
> + int n;
> + int fd;
> + struct inherit_fd *inh;
> +
> + /* beding debug support */
> + fd = -1;
> + n = sscanf(optarg, "debug[%d]:", &fd);
I say nothing about debug[%d]:fd in the commit message.
> + if (n == 1) {
> + /*
> + * If the argument has the format debug[%d]:%s, it means
> + * just write out the string after colon to the file
> + * descriptor %d. This can be used to leave a restore
> + * marker in the output stream of the process.
> + */
> + if (fd < 0) {
> + pr_msg("Invalid fd number %d in argument %s\n",
pr_err
> + fd, optarg);
> + return -1;
> + }
> + cp = strchr(optarg, ':') + 1;
> + n = strlen(cp);
> + if (write(fd, cp, n) != n) {
> + pr_msg("Cannot write debug message %s to fd %d\n",
> + cp, fd);
> + return -1;
> + }
> + return 0;
> + }
> + /* end debug support */
> +
> + fd = -1;
> + n = sscanf(optarg, "fd[%d]:", &fd);
> + if (n == 1) {
> + /* argument is in fd[%d]:%s format */
> + if (fd < 0) {
> + pr_msg("Invalid fd number %d in argument %s\n", fd, optarg);
> + return -1;
> + }
> + cp = strchr(optarg, ':');
> + *cp++ = '\0';
> + } else {
> + /* argument is in %s format */
> + cp = optarg;
> + }
> +
> + if ((inh = xmalloc(sizeof *inh)) == NULL)
> + return -1;
> + inh->inh_id = cp;
> + inh->inh_fd = fd;
> + list_add_tail(&inh->l, &opts.inherit_fds);
> + return 0;
> +}
> +
> +/*
> + * Log the inherit fd list. Called for diagnostics purposes
> + * after the log file is initialized.
> + */
> +void inherit_fd_log(void)
> +{
> + struct inherit_fd *inh;
> +
> + list_for_each_entry(inh, &opts.inherit_fds, l) {
> + if (inh->inh_fd < 0) {
> + pr_info("File %s will be marked as inherit fd\n",
> + inh->inh_id);
> + } else {
> + pr_info("File %s will be restored from criu's fd %d\n",
> + inh->inh_id, inh->inh_fd);
> + }
> + }
> +}
> +
> +/*
> + * Validate that all entries in the inherit fd list have a valid
> + * file descriptor.
> + */
> +int inherit_fd_validate_fds(void)
> +{
> + struct inherit_fd *inh;
> +
> + list_for_each_entry(inh, &opts.inherit_fds, l) {
> + if (inh->inh_fd < 0)
> + return -1;
> + }
> + return 0;
> +}
> +
> +/*
> + * Look up the inherit fd list by a file identifier.
> + *
> + * If need_fd is true, the caller wants the corresponding mapping fd.
> + * Otherwise, the caller just wants to know if the file is in the list.
> + */
> +int inherit_fd_lookup_id(char *id, bool need_fd)
> +{
> + struct inherit_fd *inh;
> +
> + list_for_each_entry(inh, &opts.inherit_fds, l) {
> + if (!strcmp(inh->inh_id, id)) {
> + pr_info("File identifier %s in inherit fd list\n", id);
> + return need_fd ? inh->inh_fd : 0;
> + }
> + }
> +
> + return -1;
> +}
> +
> +/*
> + * Look up the inherit fd list by a file descriptor.
> + */
> +bool inherit_fd_lookup_fd(int fd)
> +{
> + struct inherit_fd *inh;
> +
> + list_for_each_entry(inh, &opts.inherit_fds, l) {
> + if (inh->inh_fd == fd) {
> + pr_info("File descriptor %d in inherit fd list\n", fd);
> + return true;
> + }
> + }
> +
> + return false;
> +}
> diff --git a/include/cr_options.h b/include/cr_options.h
> index a9f9e92..3735b09 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -43,6 +43,7 @@ struct cr_options {
> struct list_head veth_pairs;
> struct list_head scripts;
> struct list_head ext_mounts;
> + struct list_head inherit_fds;
> char *libdir;
> bool use_page_server;
> unsigned short ps_port;
> diff --git a/include/files.h b/include/files.h
> index 4c9300d..e4e304f 100644
> --- a/include/files.h
> +++ b/include/files.h
> @@ -168,4 +168,12 @@ extern struct collect_image_info ext_file_cinfo;
> extern int dump_unsupp_fd(struct fd_parms *p, int lfd,
> struct cr_img *, char *more, char *info);
>
> +#define INHERIT_FD 0xcafebabe /* marker */
> +
> +extern int inherit_fd_add(char *optarg);
> +extern void inherit_fd_log(void);
> +extern int inherit_fd_validate_fds(void);
> +extern int inherit_fd_lookup_id(char *id, bool need_fd);
> +extern bool inherit_fd_lookup_fd(int fd);
> +
> #endif /* __CR_FILES_H__ */
> diff --git a/pipes.c b/pipes.c
> index ddcc5cf..73bc790 100644
> --- a/pipes.c
> +++ b/pipes.c
> @@ -293,8 +293,20 @@ static int recv_pipe_fd(struct pipe_info *pi)
> return fd;
> }
>
> +static char *pipe_id_string(int pipe_id)
> +{
> + static char idstr[16];
> +
> + if (snprintf(idstr, sizeof idstr, "pipe:[%d]", pipe_id) >= sizeof idstr) {
> + pr_err("Not enough room for pipe %d identifier string\n", pipe_id);
> + return NULL;
> + }
> + return idstr;
> +}
> +
> static int open_pipe(struct file_desc *d)
> {
> + char *pipe_name;
> struct pipe_info *pi, *p;
> int ret, tmp;
> int pfd[2];
> @@ -304,6 +316,18 @@ static int open_pipe(struct file_desc *d)
>
> pr_info("\t\tCreating pipe pipe_id=%#x id=%#x\n", pi->pe->pipe_id, pi->pe->id);
>
> + pipe_name = pipe_id_string(pi->pe->pipe_id);
> + tmp = inherit_fd_lookup_id(pipe_name, true);
> + if (tmp >= 0) {
> + pr_info("Pipe %s will be restored from inherit fd %d\n",
> + pipe_name, tmp);
> + return tmp;
> + }
> + if (pi->pe->flags == INHERIT_FD) {
> + pr_err("No inherit fd for pipe %s\n", pipe_name);
> + return -1;
> + }
> +
> if (!pi->create)
> return recv_pipe_fd(pi);
>
> @@ -497,13 +521,18 @@ static int dump_one_pipe(int lfd, u32 id, const struct fd_parms *p)
>
> pe.id = id;
> pe.pipe_id = pipe_id(p);
> - pe.flags = p->flags;
> pe.fown = (FownEntry *)&p->fown;
> + if (inherit_fd_lookup_id(pipe_id_string(pe.pipe_id), false) < 0)
> + pe.flags = p->flags;
> + else {
> + pe.flags = INHERIT_FD;
> + pr_info("Pipe %d marked as inherit fd\n", pe.pipe_id);
> + }
>
> if (pb_write_one(img_from_set(glob_imgset, CR_FD_PIPES), &pe, PB_PIPE))
> return -1;
>
> - return dump_one_pipe_data(&pd_pipes, lfd, p);
> + return pe.flags == INHERIT_FD ? 0 : dump_one_pipe_data(&pd_pipes, lfd, p);
> }
>
> const struct fdtype_ops pipe_dump_ops = {
> diff --git a/util.c b/util.c
> index dd76863..3b468f2 100644
> --- a/util.c
> +++ b/util.c
> @@ -41,6 +41,7 @@
> #include "cr_options.h"
> #include "servicefd.h"
> #include "cr-service.h"
> +#include "files.h"
>
> #define VMA_OPT_LEN 128
>
> @@ -93,6 +94,14 @@ void pr_vma(unsigned int loglevel, const struct vma_area *vma_area)
> int close_safe(int *fd)
> {
> int ret = 0;
> +
> + /*
> + * Don't close files descriptors that criu's caller
> + * set up to be inherited by the restored process.
> + */
> + if (inherit_fd_lookup_fd(*fd))
> + return 0;
I think it's a bad place for this check. close_safe() is a generic
function for closing file descriptors.
For example one of patterns how it's used
int fd = -1;
if (smth)
goto err;
fd = open();
if (fd < 0)
goto err;
...
err:
close_safe(&fd)
return -1;
> +
> if (*fd > -1) {
> ret = close(*fd);
> if (!ret)
> --
> 2.1.0.rc2.206.gedb03e5
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list