[CRIU] [PATCH] Make restored processes inherit file descriptors from criu

Pavel Emelyanov xemul at parallels.com
Thu Nov 27 03:13:01 PST 2014


On 11/25/2014 01:18 AM, Saied Kazemi wrote:
> There are cases where a process's file descriptor cannot be restored
> from the checkpointed image.  For example, a pipe file descriptor with
> one end in the checkpointed process and the other end in a separate
> process (that was not part of the checkpointed process tree) cannot be
> restored because after checkpoint the pipe would be broken and removed.
> In these cases, criu's caller should set up a new file descriptor to be
> inherited by the restored process.
> 
> When checkpointing, the argument of --inherit-fd is a string that
> identifies the file.  It can be obtained from the target of the symbolic
> link in /proc/<pid>/fd (for example, /path/to/log/file or pipe:[234956]).
> 
> When restoring, the argument of --inherit-fd has the form fd[%d]:%s,
> where %d tells criu which file descriptor to use for restoring file
> identified by %s.
> 
> Note that while it's OK to omit --inherit-fd for checkpoint and specify
> it only for restore, it's better to always specify it because checkpoint
> code will not attempt to dump an inherit fd file.

By the way, I've just realized that we have an issue with such option
definition. And with "inherited" mark stored in the images.

Let's imagine we have 3 tasks, 1st and 2nd own one file under their
descriptors, and the 3rd has opened the same path again. So the picture
looks like this

A -[ fd 1 ]--+
             +---> [ struct file 1 ]--+
B -[ fd 3 ]--+                        +--> /foo/bar
                                      |
C -[ fd 5 ]------> [ struct file 2 ]--+

If we specify the --inherit-fd /foo/bar on dump then we would
say that A's fd 1 , B's fd 3 and C's fd 5 are "inherited". On
restore we would have criu's fd 7 be /foo/bar2 and will say that
--inherit-fd fd[7]:/foo/bar. Like this

CRIU -[ fd 7 ]-> [ struct file ]--> /foo/bar2

So what we _should_ expect after restore is like this

CRIU -[ fd 7 ]-> [ struct file ]------+--> /foo/bar2
                                      |
A -[ fd 1 ]--+                        |
             +---> [ struct file 1 ]--+
B -[ fd 3 ]--+                        |
                                      |
C -[ fd 5 ]------> [ struct file 2 ]--+

I.e. -- struct file's layout is restored as it used to be, but
the actual file-on-disk they point to is inherited.

While in reality we'll have this

CRIU -[ fd 7 ]--+-> [ struct file ]-------> /foo/bar2
                |
A -[ fd 1 ]-----+
                |
B -[ fd 3 ]-----+
                |
C -[ fd 5 ]-----+

I.e. -- all tasks will just get the CRIU's struct file and use it.


> Signed-off-by: Saied Kazemi <saied at google.com>
> ---
>  crtools.c            |  15 +++++
>  files-reg.c          |   8 +++
>  files.c              | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/cr_options.h |   1 +
>  include/files.h      |   8 +++
>  pipes.c              |  33 +++++++++-
>  util.c               |   9 +++
>  7 files changed, 255 insertions(+), 2 deletions(-)
> 
> diff --git a/crtools.c b/crtools.c
> index 0ac667c..d7bb6c8 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -52,6 +52,7 @@ void init_opts(void)
>  	INIT_LIST_HEAD(&opts.veth_pairs);
>  	INIT_LIST_HEAD(&opts.scripts);
>  	INIT_LIST_HEAD(&opts.ext_mounts);
> +	INIT_LIST_HEAD(&opts.inherit_fds);
>  	INIT_LIST_HEAD(&opts.new_cgroup_roots);
>  
>  	opts.cpu_cap = CPU_CAP_DEFAULT;
> @@ -187,6 +188,7 @@ int main(int argc, char *argv[], char *envp[])
>  		{ "exec-cmd", no_argument, 0, 1059},
>  		{ "manage-cgroups", no_argument, 0, 1060},
>  		{ "cgroup-root", required_argument, 0, 1061},
> +		{ "inherit-fd", required_argument, 0, 1062},
>  		{ },
>  	};
>  
> @@ -392,6 +394,10 @@ int main(int argc, char *argv[], char *envp[])
>  					return -1;
>  			}
>  			break;
> +		case 1062:
> +			if (inherit_fd_add(optarg) < 0)
> +				return 1;
> +			break;
>  		case 'M':
>  			{
>  				char *aux;
> @@ -469,6 +475,9 @@ int main(int argc, char *argv[], char *envp[])
>  	if (log_init(opts.output))
>  		return 1;
>  
> +	/* log the inherit fd list now that log is ready */
> +	inherit_fd_log();
> +
>  	if (opts.img_parent)
>  		pr_info("Will do snapshot from %s\n", opts.img_parent);
>  
> @@ -491,6 +500,12 @@ int main(int argc, char *argv[], char *envp[])
>  		if (tree_id)
>  			pr_warn("Using -t with criu restore is obsoleted\n");
>  
> +		if (inherit_fd_validate_fds() < 0) {
> +			pr_msg("Some inherit fd items are invalid\n");
> +			pr_msg("See log file for details\n");
> +			return 1;
> +		}
> +
>  		ret = cr_restore_tasks();
>  		if (ret == 0 && opts.exec_cmd) {
>  			close_pid_proc();
> diff --git a/files-reg.c b/files-reg.c
> index 20b50a0..bfd1896 100644
> --- a/files-reg.c
> +++ b/files-reg.c
> @@ -962,6 +962,14 @@ int open_path(struct file_desc *d,
>  	}
>  
>  	mntns_root = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
> +
> +pr_debug(">>> open_path(): rfi->path=(%s)\n", rfi->path);
> +	tmp = inherit_fd_lookup_id(rfi->path, true);
> +	if (tmp >= 0) {
> +		pr_info("File %s will be restored from fd %d\n", rfi->path, tmp);
> +		return tmp;
> +	}
> +
>  	tmp = open_cb(mntns_root, rfi, arg);
>  	if (tmp < 0) {
>  		pr_perror("Can't open file %s", rfi->path);
> diff --git a/files.c b/files.c
> index f009e7d..7bace3a 100644
> --- a/files.c
> +++ b/files.c
> @@ -37,6 +37,7 @@
>  #include "imgset.h"
>  #include "fs-magic.h"
>  #include "proc_parse.h"
> +#include "cr_options.h"
>  
>  #include "parasite.h"
>  #include "parasite-syscall.h"
> @@ -1176,3 +1177,185 @@ int shared_fdt_prepare(struct pstree_item *item)
>  
>  	return 0;
>  }
> +
> +/*
> + * Inherit fd support.
> + *
> + * There are cases where a process's file descriptor cannot be restored
> + * from the checkpointed image.  For example, a pipe file descriptor with
> + * one end in the checkpointed process and the other end in a separate
> + * process (that was not part of the checkpointed process tree) cannot be
> + * restored because after checkpoint the pipe would be broken and removed.
> + * In these cases, criu's caller should set up a new file descriptor to be
> + * inherited by the restored process.
> + *
> + * When checkpointing, the argument of --inherit-fd is a string that
> + * identifies the file.  It can be obtained from the target of the
> + * symbolic link in /proc/<pid>/fd (for example, pipe:[234956]).
> + *
> + * When restoring, the argument of --inherit-fd has the form fd[%d]:%s,
> + * where %d tells criu which file descriptor to use for restoring %s.
> + *
> + * Note that while it's OK to omit --inherit-fd for checkpoint and
> + * specify it only for restore, it's better to always specify it because
> + * checkpoint code will not attempt to dump an inherit fd file.
> + *
> + * In the following example from http://criu.org/Simple_loop, we first
> + * redirect the output of test.sh to /tmp/old and then use criu's file
> + * descriptor 7 to change /tmp/old with /tmp/new:
> + *
> + *     $ ./test.sh > /tmp/old
> + *     $ sudo criu dump -j -t <pid>
> + *     $ sudo criu restore -j --inherit-fd 'fd[7]:tmp/old' 7> /tmp/new
> + *
> + * Notice that the path in the argument of --inherit-fd is relative to the
> + * root of the process (i.e., tmp/old).
> + *
> + * Below is an example of how criu will be called from Docker to checkpoint
> + * and restore a container running in detached mode:
> + *
> + *     $ docker run -d ...
> + *     $ docker checkpoint <container-id>
> + *       |
> + *       +---> criu dump ... --inherit-fd pipe:[686787] pipe:[686788]
> + *     $ docker restore <container-id>
> + *       |
> + *       +---> criu restore ... --inherit-fd fd[1]:pipe:[686787] fd[2]:pipe:[686788]
> + */
> +
> +struct inherit_fd {
> +	char *inh_id;		/* identifier */
> +	int inh_fd;		/* file descriptor to restore from */
> +	struct list_head l;
> +};
> +
> +/*
> + * We can't print diagnostics messages in this function because the
> + * log file isn't initialized yet.
> + */
> +int inherit_fd_add(char *optarg)
> +{
> +	char *cp;
> +	int n;
> +	int fd;
> +	struct inherit_fd *inh;
> +
> +	/* beding debug support */
> +	fd = -1;
> +	n = sscanf(optarg, "debug[%d]:", &fd);
> +	if (n == 1) {
> +		/*
> +		 * If the argument has the format debug[%d]:%s, it means
> +		 * just write out the string after colon to the file
> +		 * descriptor %d.  This can be used to leave a restore
> +		 * marker in the output stream of the process.
> +		 */
> +		if (fd < 0) {
> +			pr_msg("Invalid fd number %d in argument %s\n",
> +				fd, optarg);
> +			return -1;
> +		}
> +		cp = strchr(optarg, ':') + 1;
> +		n = strlen(cp);
> +		if (write(fd, cp, n) != n) {
> +			pr_msg("Cannot write debug message %s to fd %d\n",
> +				cp, fd);
> +			return -1;
> +		}
> +		return 0;
> +	}
> +	/* end debug support */
> +
> +	fd = -1;
> +	n = sscanf(optarg, "fd[%d]:", &fd);
> +	if (n == 1) {
> +		/* argument is in fd[%d]:%s format */
> +		if (fd < 0) {
> +			pr_msg("Invalid fd number %d in argument %s\n", fd, optarg);
> +			return -1;
> +		}
> +		cp = strchr(optarg, ':');
> +		*cp++ = '\0';
> +	} else {
> +		/* argument is in %s format */
> +		cp = optarg;
> +	}
> +
> +	if ((inh = xmalloc(sizeof *inh)) == NULL)
> +		return -1;
> +	inh->inh_id = cp;
> +	inh->inh_fd = fd;
> +	list_add_tail(&inh->l, &opts.inherit_fds);
> +	return 0;
> +}
> +
> +/*
> + * Log the inherit fd list.  Called for diagnostics purposes
> + * after the log file is initialized.
> + */
> +void inherit_fd_log(void)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (inh->inh_fd < 0) {
> +			pr_info("File %s will be marked as inherit fd\n",
> +				inh->inh_id);
> +		} else {
> +			pr_info("File %s will be restored from criu's fd %d\n",
> +				inh->inh_id, inh->inh_fd);
> +		}
> +	}
> +}
> +
> +/*
> + * Validate that all entries in the inherit fd list have a valid
> + * file descriptor.
> + */
> +int inherit_fd_validate_fds(void)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (inh->inh_fd < 0)
> +			return -1;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Look up the inherit fd list by a file identifier.
> + *
> + * If need_fd is true, the caller wants the corresponding mapping fd.
> + * Otherwise, the caller just wants to know if the file is in the list.
> + */
> +int inherit_fd_lookup_id(char *id, bool need_fd)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (!strcmp(inh->inh_id, id)) {
> +			pr_info("File identifier %s in inherit fd list\n", id);
> +			return need_fd ? inh->inh_fd : 0;
> +		}
> +	}
> +
> +	return -1;
> +}
> +
> +/*
> + * Look up the inherit fd list by a file descriptor.
> + */
> +bool inherit_fd_lookup_fd(int fd)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (inh->inh_fd == fd) {
> +			pr_info("File descriptor %d in inherit fd list\n", fd);
> +			return true;
> +		}
> +	}
> +
> +	return false;
> +}
> diff --git a/include/cr_options.h b/include/cr_options.h
> index a9f9e92..3735b09 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -43,6 +43,7 @@ struct cr_options {
>  	struct list_head	veth_pairs;
>  	struct list_head	scripts;
>  	struct list_head	ext_mounts;
> +	struct list_head	inherit_fds;
>  	char			*libdir;
>  	bool			use_page_server;
>  	unsigned short		ps_port;
> diff --git a/include/files.h b/include/files.h
> index 4c9300d..e4e304f 100644
> --- a/include/files.h
> +++ b/include/files.h
> @@ -168,4 +168,12 @@ extern struct collect_image_info ext_file_cinfo;
>  extern int dump_unsupp_fd(struct fd_parms *p, int lfd,
>  			  struct cr_img *, char *more, char *info);
>  
> +#define INHERIT_FD 0xcafebabe		/* marker */
> +
> +extern int inherit_fd_add(char *optarg);
> +extern void inherit_fd_log(void);
> +extern int inherit_fd_validate_fds(void);
> +extern int inherit_fd_lookup_id(char *id, bool need_fd);
> +extern bool inherit_fd_lookup_fd(int fd);
> +
>  #endif /* __CR_FILES_H__ */
> diff --git a/pipes.c b/pipes.c
> index ddcc5cf..73bc790 100644
> --- a/pipes.c
> +++ b/pipes.c
> @@ -293,8 +293,20 @@ static int recv_pipe_fd(struct pipe_info *pi)
>  	return fd;
>  }
>  
> +static char *pipe_id_string(int pipe_id)
> +{
> +	static char idstr[16];
> +
> +	if (snprintf(idstr, sizeof idstr, "pipe:[%d]", pipe_id) >= sizeof idstr) {
> +		pr_err("Not enough room for pipe %d identifier string\n", pipe_id);
> +		return NULL;
> +	}
> +	return idstr;
> +}
> +
>  static int open_pipe(struct file_desc *d)
>  {
> +	char *pipe_name;
>  	struct pipe_info *pi, *p;
>  	int ret, tmp;
>  	int pfd[2];
> @@ -304,6 +316,18 @@ static int open_pipe(struct file_desc *d)
>  
>  	pr_info("\t\tCreating pipe pipe_id=%#x id=%#x\n", pi->pe->pipe_id, pi->pe->id);
>  
> +	pipe_name = pipe_id_string(pi->pe->pipe_id);
> +	tmp = inherit_fd_lookup_id(pipe_name, true);
> +	if (tmp >= 0) {
> +		pr_info("Pipe %s will be restored from inherit fd %d\n",
> +			pipe_name, tmp);
> +		return tmp;
> +	}
> +	if (pi->pe->flags == INHERIT_FD) {
> +		pr_err("No inherit fd for pipe %s\n", pipe_name);
> +		return -1;
> +	}
> +
>  	if (!pi->create)
>  		return recv_pipe_fd(pi);
>  
> @@ -497,13 +521,18 @@ static int dump_one_pipe(int lfd, u32 id, const struct fd_parms *p)
>  
>  	pe.id		= id;
>  	pe.pipe_id	= pipe_id(p);
> -	pe.flags	= p->flags;
>  	pe.fown		= (FownEntry *)&p->fown;
> +	if (inherit_fd_lookup_id(pipe_id_string(pe.pipe_id), false) < 0)
> +		pe.flags = p->flags;
> +	else {
> +		pe.flags = INHERIT_FD;
> +		pr_info("Pipe %d marked as inherit fd\n", pe.pipe_id);
> +	}
>  
>  	if (pb_write_one(img_from_set(glob_imgset, CR_FD_PIPES), &pe, PB_PIPE))
>  		return -1;
>  
> -	return dump_one_pipe_data(&pd_pipes, lfd, p);
> +	return pe.flags == INHERIT_FD ? 0 : dump_one_pipe_data(&pd_pipes, lfd, p);
>  }
>  
>  const struct fdtype_ops pipe_dump_ops = {
> diff --git a/util.c b/util.c
> index dd76863..3b468f2 100644
> --- a/util.c
> +++ b/util.c
> @@ -41,6 +41,7 @@
>  #include "cr_options.h"
>  #include "servicefd.h"
>  #include "cr-service.h"
> +#include "files.h"
>  
>  #define VMA_OPT_LEN	128
>  
> @@ -93,6 +94,14 @@ void pr_vma(unsigned int loglevel, const struct vma_area *vma_area)
>  int close_safe(int *fd)
>  {
>  	int ret = 0;
> +
> +	/*
> +	 * Don't close files descriptors that criu's caller
> +	 * set up to be inherited by the restored process.
> +	 */
> +	if (inherit_fd_lookup_fd(*fd))
> +		return 0;
> +
>  	if (*fd > -1) {
>  		ret = close(*fd);
>  		if (!ret)
> 



More information about the CRIU mailing list