[CRIU] [PATCH 4/4] cr-service: use inherit-fd keys to ask for FD

Andrei Vagin avagin at virtuozzo.com
Wed Aug 8 19:42:32 MSK 2018


On Wed, Aug 08, 2018 at 02:27:15PM +0200, Adrian Reber wrote:
> On Tue, Aug 07, 2018 at 10:47:02PM -0700, Andrei Vagin wrote:
> > On Tue, Aug 07, 2018 at 10:44:01PM +0200, Adrian Reber wrote:
> > > On Mon, Aug 06, 2018 at 02:01:05PM -0700, Andrei Vagin wrote:
> > > > On Mon, Aug 06, 2018 at 04:55:00PM +0000, Adrian Reber wrote:
> > > > > From: Adrian Reber <areber at redhat.com>
> > > > > 
> > > > > Using inherit-fd in RPC mode uses the notify mechanism to ask the RPC
> > > > > client for the real FD.
> > > > > 
> > > > > The basic workflow of inherit-fd in RPC mode is:
> > > > > 
> > > > >  * RPC clients fills in inherit-fd field:
> > > > >    * inheritFd.Key = proto.String(path.Base(nsPath))
> > > > >    * inheritFd.Fd is irrelevant as it will be transmitted later
> > > > >  * CRIU receives RPC messages and calls inherit_fd_add_rpc()
> > > > >    * this transmits the inheritFd.Key back to the RPC client
> > > > >    * RPC client uses unix domain socket to send the real FD
> > > > >      to CRIU
> > > > > 
> > > > > Unfortunately this is more complicated as runc already uses
> > > > > inheritFd.Fd to send the file descriptors for 0, 1, 2. As those are
> > > > > always stdin, stdout, stderr is just worked even if the transmitted
> > > > > values via RPC were not actual FDs but only numbers.
> > > > > 
> > > > > To handle this, CRIU still accepts inherit-fds directly as FDs if it is
> > > > > 0, 1, 2. This is a compatibility hack to avoid breaking runc.
> > > > > 
> > > > > Signed-off-by: Adrian Reber <areber at redhat.com>
> > > > > ---
> > > > >  criu/action-scripts.c         | 10 ++++++---
> > > > >  criu/cr-service.c             | 38 ++++++++++++++++++++++++++++++++---
> > > > >  criu/files.c                  | 10 +++++++++
> > > > >  criu/include/action-scripts.h |  4 ++--
> > > > >  criu/include/files.h          |  1 +
> > > > >  criu/tty.c                    |  2 +-
> > > > >  6 files changed, 56 insertions(+), 9 deletions(-)
> > > > > 
> > > > > diff --git a/criu/action-scripts.c b/criu/action-scripts.c
> > > > > index 4e9eb65cf..1a7523527 100644
> > > > > --- a/criu/action-scripts.c
> > > > > +++ b/criu/action-scripts.c
> > > > > @@ -100,7 +100,11 @@ static int run_shell_scripts(const char *action)
> > > > >  	return retval;
> > > > >  }
> > > > >  
> > > > > -int rpc_send_fd(enum script_actions act, int fd)
> > > > > +/*
> > > > > + * The name of this function is misleading. Right now it only
> > > > > + * sends an FD in one of three possible use cases.
> > > > > + */
> > > > > +int rpc_send_fd(enum script_actions act, int fd, char *key)
> > > > >  {
> > > > >  	const char *action = action_names[act];
> > > > >  	int rpc_sk;
> > > > > @@ -113,7 +117,7 @@ int rpc_send_fd(enum script_actions act, int fd)
> > > > >  		return -1;
> > > > >  
> > > > >  	pr_debug("\tRPC\n");
> > > > > -	return send_criu_rpc_script(act, (char *)action, rpc_sk, fd);
> > > > > +	return send_criu_rpc_script(act, (char *)action, rpc_sk, fd, key);
> > > > >  }
> > > > >  
> > > > >  int run_scripts(enum script_actions act)
> > > > > @@ -127,7 +131,7 @@ int run_scripts(enum script_actions act)
> > > > >  		return 0;
> > > > >  
> > > > >  	if (scripts_mode == SCRIPTS_RPC) {
> > > > > -		ret = rpc_send_fd(act, -1);
> > > > > +		ret = rpc_send_fd(act, -1, NULL);
> > > > >  		goto out;
> > > > >  	}
> > > > >  
> > > > > diff --git a/criu/cr-service.c b/criu/cr-service.c
> > > > > index 643aba9cf..45d8b3f44 100644
> > > > > --- a/criu/cr-service.c
> > > > > +++ b/criu/cr-service.c
> > > > > @@ -188,9 +188,10 @@ int send_criu_restore_resp(int socket_fd, bool success, int pid)
> > > > >  	return send_criu_msg(socket_fd, &msg);
> > > > >  }
> > > > >  
> > > > > -int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > > +int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd, char *key)
> > > > >  {
> > > > >  	int ret;
> > > > > +	int ret_fd = 0;
> > > > >  	CriuResp msg = CRIU_RESP__INIT;
> > > > >  	CriuReq *req;
> > > > >  	CriuNotify cn = CRIU_NOTIFY__INIT;
> > > > > @@ -211,6 +212,15 @@ int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > >  		cn.has_pid = true;
> > > > >  		cn.pid = root_item->pid->real;
> > > > >  		break;
> > > > > +	case ACT_REQ_INHERIT_FD:
> > > > > +		/*
> > > > > +		 * Sending a 'ACT_REQ_INHERIT_FD' notify message
> > > > > +		 * only makes sense when 'key' is set.
> > > > > +		 */
> > > > > +		if (!key)
> > > > > +			return -1;
> > > > > +		cn.inherit_fd_key = key;
> > > > > +		break;
> > > > >  	default:
> > > > >  		break;
> > > > >  	}
> > > > > @@ -219,6 +229,18 @@ int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > >  	if (ret < 0)
> > > > >  		return ret;
> > > > >  
> > > > > +	if (act == ACT_REQ_INHERIT_FD) {
> > > > > +		/*
> > > > > +		 * Sending a ACT_REQ_INHERIT_FD notify message means, that CRIU
> > > > > +		 * expects to get a FD from the RPC client.
> > > > > +		 */
> > > > > +		ret_fd = recv_fd(sk);
> > > > > +		if (ret_fd <= 0) {
> > > > > +			pr_perror("recv_fd error\n");
> > > > > +			return -1;
> > > > > +		}
> > > > > +	}
> > > > > +
> > > > >  	ret = recv_criu_msg(sk, &req);
> > > > >  	if (ret < 0)
> > > > >  		return ret;
> > > > > @@ -229,7 +251,7 @@ int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > >  	}
> > > > >  
> > > > >  	criu_req__free_unpacked(req, NULL);
> > > > > -	return 0;
> > > > > +	return ret_fd;
> > > > >  }
> > > > >  
> > > > >  static char images_dir[PATH_MAX];
> > > > > @@ -481,11 +503,21 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
> > > > >  	}
> > > > >  
> > > > >  	if (req->n_inherit_fd && !opts.swrk_restore) {
> > > > > +		/* TODO: Is this really still true, even with callback handling for inherit_fd */
> > > > >  		pr_err("inherit_fd is not allowed in standalone service\n");
> > > > >  		goto err;
> > > > >  	}
> > > > >  	for (i = 0; i < req->n_inherit_fd; i++) {
> > > > > -		if (inherit_fd_add(req->inherit_fd[i]->fd, req->inherit_fd[i]->key))
> > > > > +		/*
> > > > > +		 * For the FDs 0, 1, 2 this falls back to old, non callbacb
> > > > > +		 * inherit_fd mode. At some point in the future this can be removed.
> > > > > +		 */
> > > > > +		if (req->inherit_fd[i]->fd >=0 && req->inherit_fd[i]->fd <= 2) {
> > > > 
> > > > We can add the following patch and handle all non-negative fd
> > > > 
> > > > diff --git a/images/rpc.proto b/images/rpc.proto
> > > > index 33ef3302b..04b933674 100644
> > > > --- a/images/rpc.proto
> > > > +++ b/images/rpc.proto
> > > > @@ -25,7 +25,7 @@ message join_namespace {
> > > >  
> > > >  message inherit_fd {
> > > >         required string         key     = 1;
> > > > -       required int32          fd      = 2;
> > > > +       required int32          fd      = 2 [default = -1];
> > > >  };
> > > >  
> > > >  message cgroup_root {
> > > 
> > > Hmm, how could this be used? With my current CRIU and runc patches, CRIU
> > > would look at 'fd' at only do something special if it is 0, 1, 2. See
> > > below. If it is > 2 or < 0 would be the same. CRIU just ignores all
> > > other FD values as they have to be retrieved via the new callback.
> > > 
> > > > > +			if (inherit_fd_add(req->inherit_fd[i]->fd, req->inherit_fd[i]->key))
> > > > > +				goto err;
> > > > > +			continue;
> > > > > +		}
> > > > > +		if (inherit_fd_add_rpc(req->inherit_fd[i]->key))
> > > > 
> > > > I think you misunderstood me. I was thinking to request an inherit fd from
> > > > inherit_fd_lookup_id() to avoid  fd clashes on restore
> > > 
> > > I still do not get it. How is this related to inherit_fd_lookup_id()
> > > 
> > > > (inherit_fd_resolve_clash). It will work this way too, but in case of
> > > > swrk, it will be more efficiant just to run criu swrk will all required
> > > > fds (this way is handled in a previous "if").
> > > 
> > > I also do not understand this. 'criu swrk with all required fds'? Like
> > > runc starts CRIU in swrk mode, with an additional parameter as an FD?
> > > 
> > > Right now runc does 'criu swrk 3'. Where '3' is the RPC data channel,
> > > right? And you say to add more FDs after the '3'?
> > 
> > No. I'm trying to say a different thing.
> > 
> > When we start a new process, it inherits all file descriptors from a
> > parent for which the FD_CLOEXEC bit isn't set. So it is possible to
> > start a criu swrk process which will have all required descriptors from
> > a parent process and then we will only need to send their numbers in an
> > rpc message.
> 
> I just tried it and I guess I am missing something. How do I open the
> file descriptor I get from the parent process?

You open a file descriptor in a parent process and then it should be
inherited by a child process (criu swrk). I have attached a patch for
runc. Coould you take a look at it? I don't test it, but I think it can
help to understand the idea.

> 
> And how do I know if I need to open it read-only, write-only,
> read-write?

The parent process should know properties of all external descriptors.
-------------- next part --------------
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index b9538142..896b7f1e 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -657,7 +657,7 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
 		Features: criuFeat,
 	}
 
-	err := c.criuSwrk(nil, req, criuOpts, false)
+	err := c.criuSwrk(nil, req, criuOpts, false, nil)
 	if err != nil {
 		logrus.Debugf("%s", err)
 		return fmt.Errorf("CRIU feature check failed")
@@ -770,7 +770,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
 		Type: &t,
 	}
 
-	err := c.criuSwrk(nil, req, nil, false)
+	err := c.criuSwrk(nil, req, nil, false, nil)
 	if err != nil {
 		return fmt.Errorf("CRIU version check failed: %s", err)
 	}
@@ -1059,7 +1059,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 		}
 	}
 
-	err = c.criuSwrk(nil, req, criuOpts, false)
+	err = c.criuSwrk(nil, req, criuOpts, false, nil)
 	if err != nil {
 		return err
 	}
@@ -1103,6 +1103,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 	c.m.Lock()
 	defer c.m.Unlock()
 
+	var extraFiles []*os.File
+
 	// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
 	//               support for unprivileged restore at the moment.
 	if c.config.Rootless {
@@ -1193,6 +1195,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 			// The <key> needs to be the same as during checkpointing.
 			// We are always using the path.Base(nsPath) as the key in this.
 			netns, err := os.Open(nsPath)
+			defer netns.Close()
 			if err != nil {
 				logrus.Error("If a specific network namespace is defined it must exist.")
 				logrus.Error(err)
@@ -1204,6 +1207,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 			// and by re-opening exactly that FD.
 			inheritFd.Fd = proto.Int32(int32(netns.Fd()))
 			req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
+			extraFiles = append(extraFiles, netns)
 		}
 	}
 
@@ -1265,7 +1269,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 			req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
 		}
 	}
-	return c.criuSwrk(process, req, criuOpts, true)
+	return c.criuSwrk(process, req, criuOpts, true, extraFiles)
 }
 
 func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
@@ -1295,7 +1299,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
 	return nil
 }
 
-func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
+func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
 	fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
 	if err != nil {
 		return err
@@ -1335,6 +1339,9 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 		cmd.Stdout = process.Stdout
 		cmd.Stderr = process.Stderr
 	}
+	if extraFiles != nil {
+		cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...)
+	}
 	cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
 
 	if err := cmd.Start(); err != nil {


More information about the CRIU mailing list