[CRIU] [PATCH] Make restored processes inherit file descriptors from criu

Andrew Vagin avagin at parallels.com
Mon Nov 24 23:12:27 PST 2014


On Mon, Nov 24, 2014 at 02:18:23PM -0800, Saied Kazemi wrote:
> There are cases where a process's file descriptor cannot be restored
> from the checkpointed image.  For example, a pipe file descriptor with
> one end in the checkpointed process and the other end in a separate
> process (that was not part of the checkpointed process tree) cannot be
> restored because after checkpoint the pipe would be broken and removed.
> In these cases, criu's caller should set up a new file descriptor to be
> inherited by the restored process.
> 
> When checkpointing, the argument of --inherit-fd is a string that
> identifies the file.  It can be obtained from the target of the symbolic
> link in /proc/<pid>/fd (for example, /path/to/log/file or pipe:[234956]).
> 
> When restoring, the argument of --inherit-fd has the form fd[%d]:%s,
> where %d tells criu which file descriptor to use for restoring file
> identified by %s.

How do you handle conflicts between inherit-fd sets and restored file
descriptors.

For example I have a process with the following set of file descriptors:
0 -> tty
1 -> tty
2 -> tty
3 -> /test
4 -> pipe:[234956]

On restore I create a pipe and use --inherit-fd fd3:pipe:[234956]. In
this case the 3 fd will be restored before the 4 fd. Why does this
operation not overwrite the inherit-fd descriptor?

Why does criu not return an error?
fd 3 already in use (called at files.c:867

> 
> Note that while it's OK to omit --inherit-fd for checkpoint and specify
> it only for restore, it's better to always specify it because checkpoint
> code will not attempt to dump an inherit fd file.
> 
> Signed-off-by: Saied Kazemi <saied at google.com>
> ---
>  crtools.c            |  15 +++++
>  files-reg.c          |   8 +++
>  files.c              | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/cr_options.h |   1 +
>  include/files.h      |   8 +++
>  pipes.c              |  33 +++++++++-

Does it work for only pipes now?

>  util.c               |   9 +++

I would like to have tests for this functionality.

>  7 files changed, 255 insertions(+), 2 deletions(-)
> 
> diff --git a/crtools.c b/crtools.c
> index 0ac667c..d7bb6c8 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -52,6 +52,7 @@ void init_opts(void)
>  	INIT_LIST_HEAD(&opts.veth_pairs);
>  	INIT_LIST_HEAD(&opts.scripts);
>  	INIT_LIST_HEAD(&opts.ext_mounts);
> +	INIT_LIST_HEAD(&opts.inherit_fds);
>  	INIT_LIST_HEAD(&opts.new_cgroup_roots);
>  
>  	opts.cpu_cap = CPU_CAP_DEFAULT;
> @@ -187,6 +188,7 @@ int main(int argc, char *argv[], char *envp[])
>  		{ "exec-cmd", no_argument, 0, 1059},
>  		{ "manage-cgroups", no_argument, 0, 1060},
>  		{ "cgroup-root", required_argument, 0, 1061},
> +		{ "inherit-fd", required_argument, 0, 1062},
>  		{ },
>  	};
>  
> @@ -392,6 +394,10 @@ int main(int argc, char *argv[], char *envp[])
>  					return -1;
>  			}
>  			break;
> +		case 1062:
> +			if (inherit_fd_add(optarg) < 0)
> +				return 1;
> +			break;
>  		case 'M':
>  			{
>  				char *aux;
> @@ -469,6 +475,9 @@ int main(int argc, char *argv[], char *envp[])
>  	if (log_init(opts.output))
>  		return 1;
>  
> +	/* log the inherit fd list now that log is ready */
> +	inherit_fd_log();
> +
>  	if (opts.img_parent)
>  		pr_info("Will do snapshot from %s\n", opts.img_parent);
>  
> @@ -491,6 +500,12 @@ int main(int argc, char *argv[], char *envp[])
>  		if (tree_id)
>  			pr_warn("Using -t with criu restore is obsoleted\n");
>  
> +		if (inherit_fd_validate_fds() < 0) {
> +			pr_msg("Some inherit fd items are invalid\n");
> +			pr_msg("See log file for details\n");
> +			return 1;
> +		}
> +
>  		ret = cr_restore_tasks();
>  		if (ret == 0 && opts.exec_cmd) {
>  			close_pid_proc();
> diff --git a/files-reg.c b/files-reg.c
> index 20b50a0..bfd1896 100644
> --- a/files-reg.c
> +++ b/files-reg.c
> @@ -962,6 +962,14 @@ int open_path(struct file_desc *d,
>  	}
>  
>  	mntns_root = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
> +
> +pr_debug(">>> open_path(): rfi->path=(%s)\n", rfi->path);

^^^^ I think you forgot to remove this message

> +	tmp = inherit_fd_lookup_id(rfi->path, true);
> +	if (tmp >= 0) {
> +		pr_info("File %s will be restored from fd %d\n", rfi->path, tmp);
> +		return tmp;
> +	}
> +
>  	tmp = open_cb(mntns_root, rfi, arg);
>  	if (tmp < 0) {
>  		pr_perror("Can't open file %s", rfi->path);
> diff --git a/files.c b/files.c
> index f009e7d..7bace3a 100644
> --- a/files.c
> +++ b/files.c
> @@ -37,6 +37,7 @@
>  #include "imgset.h"
>  #include "fs-magic.h"
>  #include "proc_parse.h"
> +#include "cr_options.h"
>  
>  #include "parasite.h"
>  #include "parasite-syscall.h"
> @@ -1176,3 +1177,185 @@ int shared_fdt_prepare(struct pstree_item *item)
>  
>  	return 0;
>  }
> +
> +/*
> + * Inherit fd support.
> + *
> + * There are cases where a process's file descriptor cannot be restored
> + * from the checkpointed image.  For example, a pipe file descriptor with
> + * one end in the checkpointed process and the other end in a separate
> + * process (that was not part of the checkpointed process tree) cannot be
> + * restored because after checkpoint the pipe would be broken and removed.
> + * In these cases, criu's caller should set up a new file descriptor to be
> + * inherited by the restored process.
> + *
> + * When checkpointing, the argument of --inherit-fd is a string that
> + * identifies the file.  It can be obtained from the target of the
> + * symbolic link in /proc/<pid>/fd (for example, pipe:[234956]).
> + *
> + * When restoring, the argument of --inherit-fd has the form fd[%d]:%s,
> + * where %d tells criu which file descriptor to use for restoring %s.
> + *
> + * Note that while it's OK to omit --inherit-fd for checkpoint and
> + * specify it only for restore, it's better to always specify it because
> + * checkpoint code will not attempt to dump an inherit fd file.
> + *
> + * In the following example from http://criu.org/Simple_loop, we first
> + * redirect the output of test.sh to /tmp/old and then use criu's file
> + * descriptor 7 to change /tmp/old with /tmp/new:
> + *
> + *     $ ./test.sh > /tmp/old
> + *     $ sudo criu dump -j -t <pid>
> + *     $ sudo criu restore -j --inherit-fd 'fd[7]:tmp/old' 7> /tmp/new
> + *
> + * Notice that the path in the argument of --inherit-fd is relative to the
> + * root of the process (i.e., tmp/old).
> + *
> + * Below is an example of how criu will be called from Docker to checkpoint
> + * and restore a container running in detached mode:
> + *
> + *     $ docker run -d ...
> + *     $ docker checkpoint <container-id>
> + *       |
> + *       +---> criu dump ... --inherit-fd pipe:[686787] pipe:[686788]
> + *     $ docker restore <container-id>
> + *       |
> + *       +---> criu restore ... --inherit-fd fd[1]:pipe:[686787] fd[2]:pipe:[686788]
> + */
> +
> +struct inherit_fd {
> +	char *inh_id;		/* identifier */
> +	int inh_fd;		/* file descriptor to restore from */
> +	struct list_head l;
> +};
> +
> +/*
> + * We can't print diagnostics messages in this function because the
> + * log file isn't initialized yet.
> + */
> +int inherit_fd_add(char *optarg)
> +{
> +	char *cp;
> +	int n;
> +	int fd;
> +	struct inherit_fd *inh;
> +
> +	/* beding debug support */
> +	fd = -1;
> +	n = sscanf(optarg, "debug[%d]:", &fd);

I say nothing about debug[%d]:fd in the commit message.

> +	if (n == 1) {
> +		/*
> +		 * If the argument has the format debug[%d]:%s, it means
> +		 * just write out the string after colon to the file
> +		 * descriptor %d.  This can be used to leave a restore
> +		 * marker in the output stream of the process.
> +		 */
> +		if (fd < 0) {
> +			pr_msg("Invalid fd number %d in argument %s\n",

pr_err

> +				fd, optarg);
> +			return -1;
> +		}
> +		cp = strchr(optarg, ':') + 1;
> +		n = strlen(cp);
> +		if (write(fd, cp, n) != n) {
> +			pr_msg("Cannot write debug message %s to fd %d\n",
> +				cp, fd);
> +			return -1;
> +		}
> +		return 0;
> +	}
> +	/* end debug support */
> +
> +	fd = -1;
> +	n = sscanf(optarg, "fd[%d]:", &fd);
> +	if (n == 1) {
> +		/* argument is in fd[%d]:%s format */
> +		if (fd < 0) {
> +			pr_msg("Invalid fd number %d in argument %s\n", fd, optarg);
> +			return -1;
> +		}
> +		cp = strchr(optarg, ':');
> +		*cp++ = '\0';
> +	} else {
> +		/* argument is in %s format */
> +		cp = optarg;
> +	}
> +
> +	if ((inh = xmalloc(sizeof *inh)) == NULL)
> +		return -1;
> +	inh->inh_id = cp;
> +	inh->inh_fd = fd;
> +	list_add_tail(&inh->l, &opts.inherit_fds);
> +	return 0;
> +}
> +
> +/*
> + * Log the inherit fd list.  Called for diagnostics purposes
> + * after the log file is initialized.
> + */
> +void inherit_fd_log(void)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (inh->inh_fd < 0) {
> +			pr_info("File %s will be marked as inherit fd\n",
> +				inh->inh_id);
> +		} else {
> +			pr_info("File %s will be restored from criu's fd %d\n",
> +				inh->inh_id, inh->inh_fd);
> +		}
> +	}
> +}
> +
> +/*
> + * Validate that all entries in the inherit fd list have a valid
> + * file descriptor.
> + */
> +int inherit_fd_validate_fds(void)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (inh->inh_fd < 0)
> +			return -1;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Look up the inherit fd list by a file identifier.
> + *
> + * If need_fd is true, the caller wants the corresponding mapping fd.
> + * Otherwise, the caller just wants to know if the file is in the list.
> + */
> +int inherit_fd_lookup_id(char *id, bool need_fd)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (!strcmp(inh->inh_id, id)) {
> +			pr_info("File identifier %s in inherit fd list\n", id);
> +			return need_fd ? inh->inh_fd : 0;
> +		}
> +	}
> +
> +	return -1;
> +}
> +
> +/*
> + * Look up the inherit fd list by a file descriptor.
> + */
> +bool inherit_fd_lookup_fd(int fd)
> +{
> +	struct inherit_fd *inh;
> +
> +	list_for_each_entry(inh, &opts.inherit_fds, l) {
> +		if (inh->inh_fd == fd) {
> +			pr_info("File descriptor %d in inherit fd list\n", fd);
> +			return true;
> +		}
> +	}
> +
> +	return false;
> +}
> diff --git a/include/cr_options.h b/include/cr_options.h
> index a9f9e92..3735b09 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -43,6 +43,7 @@ struct cr_options {
>  	struct list_head	veth_pairs;
>  	struct list_head	scripts;
>  	struct list_head	ext_mounts;
> +	struct list_head	inherit_fds;
>  	char			*libdir;
>  	bool			use_page_server;
>  	unsigned short		ps_port;
> diff --git a/include/files.h b/include/files.h
> index 4c9300d..e4e304f 100644
> --- a/include/files.h
> +++ b/include/files.h
> @@ -168,4 +168,12 @@ extern struct collect_image_info ext_file_cinfo;
>  extern int dump_unsupp_fd(struct fd_parms *p, int lfd,
>  			  struct cr_img *, char *more, char *info);
>  
> +#define INHERIT_FD 0xcafebabe		/* marker */
> +
> +extern int inherit_fd_add(char *optarg);
> +extern void inherit_fd_log(void);
> +extern int inherit_fd_validate_fds(void);
> +extern int inherit_fd_lookup_id(char *id, bool need_fd);
> +extern bool inherit_fd_lookup_fd(int fd);
> +
>  #endif /* __CR_FILES_H__ */
> diff --git a/pipes.c b/pipes.c
> index ddcc5cf..73bc790 100644
> --- a/pipes.c
> +++ b/pipes.c
> @@ -293,8 +293,20 @@ static int recv_pipe_fd(struct pipe_info *pi)
>  	return fd;
>  }
>  
> +static char *pipe_id_string(int pipe_id)
> +{
> +	static char idstr[16];
> +
> +	if (snprintf(idstr, sizeof idstr, "pipe:[%d]", pipe_id) >= sizeof idstr) {
> +		pr_err("Not enough room for pipe %d identifier string\n", pipe_id);
> +		return NULL;
> +	}
> +	return idstr;
> +}
> +
>  static int open_pipe(struct file_desc *d)
>  {
> +	char *pipe_name;
>  	struct pipe_info *pi, *p;
>  	int ret, tmp;
>  	int pfd[2];
> @@ -304,6 +316,18 @@ static int open_pipe(struct file_desc *d)
>  
>  	pr_info("\t\tCreating pipe pipe_id=%#x id=%#x\n", pi->pe->pipe_id, pi->pe->id);
>  
> +	pipe_name = pipe_id_string(pi->pe->pipe_id);
> +	tmp = inherit_fd_lookup_id(pipe_name, true);
> +	if (tmp >= 0) {
> +		pr_info("Pipe %s will be restored from inherit fd %d\n",
> +			pipe_name, tmp);
> +		return tmp;
> +	}
> +	if (pi->pe->flags == INHERIT_FD) {
> +		pr_err("No inherit fd for pipe %s\n", pipe_name);
> +		return -1;
> +	}
> +
>  	if (!pi->create)
>  		return recv_pipe_fd(pi);
>  
> @@ -497,13 +521,18 @@ static int dump_one_pipe(int lfd, u32 id, const struct fd_parms *p)
>  
>  	pe.id		= id;
>  	pe.pipe_id	= pipe_id(p);
> -	pe.flags	= p->flags;
>  	pe.fown		= (FownEntry *)&p->fown;
> +	if (inherit_fd_lookup_id(pipe_id_string(pe.pipe_id), false) < 0)
> +		pe.flags = p->flags;
> +	else {
> +		pe.flags = INHERIT_FD;
> +		pr_info("Pipe %d marked as inherit fd\n", pe.pipe_id);
> +	}
>  
>  	if (pb_write_one(img_from_set(glob_imgset, CR_FD_PIPES), &pe, PB_PIPE))
>  		return -1;
>  
> -	return dump_one_pipe_data(&pd_pipes, lfd, p);
> +	return pe.flags == INHERIT_FD ? 0 : dump_one_pipe_data(&pd_pipes, lfd, p);
>  }
>  
>  const struct fdtype_ops pipe_dump_ops = {
> diff --git a/util.c b/util.c
> index dd76863..3b468f2 100644
> --- a/util.c
> +++ b/util.c
> @@ -41,6 +41,7 @@
>  #include "cr_options.h"
>  #include "servicefd.h"
>  #include "cr-service.h"
> +#include "files.h"
>  
>  #define VMA_OPT_LEN	128
>  
> @@ -93,6 +94,14 @@ void pr_vma(unsigned int loglevel, const struct vma_area *vma_area)
>  int close_safe(int *fd)
>  {
>  	int ret = 0;
> +
> +	/*
> +	 * Don't close files descriptors that criu's caller
> +	 * set up to be inherited by the restored process.
> +	 */
> +	if (inherit_fd_lookup_fd(*fd))
> +		return 0;

I think it's a bad place for this check. close_safe() is a generic
function for closing file descriptors.

For example one of patterns how it's used

	int fd = -1;
	if (smth)
		goto err;

	fd = open();
	if (fd < 0)
		goto err;

	...

err:
	close_safe(&fd)
	return -1;
> +
>  	if (*fd > -1) {
>  		ret = close(*fd);
>  		if (!ret)
> -- 
> 2.1.0.rc2.206.gedb03e5
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list