[CRIU] [PATCH 4/4] cr-service: use inherit-fd keys to ask for FD
Andrei Vagin
avagin at virtuozzo.com
Wed Aug 8 19:42:32 MSK 2018
On Wed, Aug 08, 2018 at 02:27:15PM +0200, Adrian Reber wrote:
> On Tue, Aug 07, 2018 at 10:47:02PM -0700, Andrei Vagin wrote:
> > On Tue, Aug 07, 2018 at 10:44:01PM +0200, Adrian Reber wrote:
> > > On Mon, Aug 06, 2018 at 02:01:05PM -0700, Andrei Vagin wrote:
> > > > On Mon, Aug 06, 2018 at 04:55:00PM +0000, Adrian Reber wrote:
> > > > > From: Adrian Reber <areber at redhat.com>
> > > > >
> > > > > Using inherit-fd in RPC mode uses the notify mechanism to ask the RPC
> > > > > client for the real FD.
> > > > >
> > > > > The basic workflow of inherit-fd in RPC mode is:
> > > > >
> > > > > * RPC clients fills in inherit-fd field:
> > > > > * inheritFd.Key = proto.String(path.Base(nsPath))
> > > > > * inheritFd.Fd is irrelevant as it will be transmitted later
> > > > > * CRIU receives RPC messages and calls inherit_fd_add_rpc()
> > > > > * this transmits the inheritFd.Key back to the RPC client
> > > > > * RPC client uses unix domain socket to send the real FD
> > > > > to CRIU
> > > > >
> > > > > Unfortunately this is more complicated as runc already uses
> > > > > inheritFd.Fd to send the file descriptors for 0, 1, 2. As those are
> > > > > always stdin, stdout, stderr is just worked even if the transmitted
> > > > > values via RPC were not actual FDs but only numbers.
> > > > >
> > > > > To handle this, CRIU still accepts inherit-fds directly as FDs if it is
> > > > > 0, 1, 2. This is a compatibility hack to avoid breaking runc.
> > > > >
> > > > > Signed-off-by: Adrian Reber <areber at redhat.com>
> > > > > ---
> > > > > criu/action-scripts.c | 10 ++++++---
> > > > > criu/cr-service.c | 38 ++++++++++++++++++++++++++++++++---
> > > > > criu/files.c | 10 +++++++++
> > > > > criu/include/action-scripts.h | 4 ++--
> > > > > criu/include/files.h | 1 +
> > > > > criu/tty.c | 2 +-
> > > > > 6 files changed, 56 insertions(+), 9 deletions(-)
> > > > >
> > > > > diff --git a/criu/action-scripts.c b/criu/action-scripts.c
> > > > > index 4e9eb65cf..1a7523527 100644
> > > > > --- a/criu/action-scripts.c
> > > > > +++ b/criu/action-scripts.c
> > > > > @@ -100,7 +100,11 @@ static int run_shell_scripts(const char *action)
> > > > > return retval;
> > > > > }
> > > > >
> > > > > -int rpc_send_fd(enum script_actions act, int fd)
> > > > > +/*
> > > > > + * The name of this function is misleading. Right now it only
> > > > > + * sends an FD in one of three possible use cases.
> > > > > + */
> > > > > +int rpc_send_fd(enum script_actions act, int fd, char *key)
> > > > > {
> > > > > const char *action = action_names[act];
> > > > > int rpc_sk;
> > > > > @@ -113,7 +117,7 @@ int rpc_send_fd(enum script_actions act, int fd)
> > > > > return -1;
> > > > >
> > > > > pr_debug("\tRPC\n");
> > > > > - return send_criu_rpc_script(act, (char *)action, rpc_sk, fd);
> > > > > + return send_criu_rpc_script(act, (char *)action, rpc_sk, fd, key);
> > > > > }
> > > > >
> > > > > int run_scripts(enum script_actions act)
> > > > > @@ -127,7 +131,7 @@ int run_scripts(enum script_actions act)
> > > > > return 0;
> > > > >
> > > > > if (scripts_mode == SCRIPTS_RPC) {
> > > > > - ret = rpc_send_fd(act, -1);
> > > > > + ret = rpc_send_fd(act, -1, NULL);
> > > > > goto out;
> > > > > }
> > > > >
> > > > > diff --git a/criu/cr-service.c b/criu/cr-service.c
> > > > > index 643aba9cf..45d8b3f44 100644
> > > > > --- a/criu/cr-service.c
> > > > > +++ b/criu/cr-service.c
> > > > > @@ -188,9 +188,10 @@ int send_criu_restore_resp(int socket_fd, bool success, int pid)
> > > > > return send_criu_msg(socket_fd, &msg);
> > > > > }
> > > > >
> > > > > -int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > > +int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd, char *key)
> > > > > {
> > > > > int ret;
> > > > > + int ret_fd = 0;
> > > > > CriuResp msg = CRIU_RESP__INIT;
> > > > > CriuReq *req;
> > > > > CriuNotify cn = CRIU_NOTIFY__INIT;
> > > > > @@ -211,6 +212,15 @@ int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > > cn.has_pid = true;
> > > > > cn.pid = root_item->pid->real;
> > > > > break;
> > > > > + case ACT_REQ_INHERIT_FD:
> > > > > + /*
> > > > > + * Sending a 'ACT_REQ_INHERIT_FD' notify message
> > > > > + * only makes sense when 'key' is set.
> > > > > + */
> > > > > + if (!key)
> > > > > + return -1;
> > > > > + cn.inherit_fd_key = key;
> > > > > + break;
> > > > > default:
> > > > > break;
> > > > > }
> > > > > @@ -219,6 +229,18 @@ int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > > if (ret < 0)
> > > > > return ret;
> > > > >
> > > > > + if (act == ACT_REQ_INHERIT_FD) {
> > > > > + /*
> > > > > + * Sending a ACT_REQ_INHERIT_FD notify message means, that CRIU
> > > > > + * expects to get a FD from the RPC client.
> > > > > + */
> > > > > + ret_fd = recv_fd(sk);
> > > > > + if (ret_fd <= 0) {
> > > > > + pr_perror("recv_fd error\n");
> > > > > + return -1;
> > > > > + }
> > > > > + }
> > > > > +
> > > > > ret = recv_criu_msg(sk, &req);
> > > > > if (ret < 0)
> > > > > return ret;
> > > > > @@ -229,7 +251,7 @@ int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
> > > > > }
> > > > >
> > > > > criu_req__free_unpacked(req, NULL);
> > > > > - return 0;
> > > > > + return ret_fd;
> > > > > }
> > > > >
> > > > > static char images_dir[PATH_MAX];
> > > > > @@ -481,11 +503,21 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
> > > > > }
> > > > >
> > > > > if (req->n_inherit_fd && !opts.swrk_restore) {
> > > > > + /* TODO: Is this really still true, even with callback handling for inherit_fd */
> > > > > pr_err("inherit_fd is not allowed in standalone service\n");
> > > > > goto err;
> > > > > }
> > > > > for (i = 0; i < req->n_inherit_fd; i++) {
> > > > > - if (inherit_fd_add(req->inherit_fd[i]->fd, req->inherit_fd[i]->key))
> > > > > + /*
> > > > > + * For the FDs 0, 1, 2 this falls back to old, non callbacb
> > > > > + * inherit_fd mode. At some point in the future this can be removed.
> > > > > + */
> > > > > + if (req->inherit_fd[i]->fd >=0 && req->inherit_fd[i]->fd <= 2) {
> > > >
> > > > We can add the following patch and handle all non-negative fd
> > > >
> > > > diff --git a/images/rpc.proto b/images/rpc.proto
> > > > index 33ef3302b..04b933674 100644
> > > > --- a/images/rpc.proto
> > > > +++ b/images/rpc.proto
> > > > @@ -25,7 +25,7 @@ message join_namespace {
> > > >
> > > > message inherit_fd {
> > > > required string key = 1;
> > > > - required int32 fd = 2;
> > > > + required int32 fd = 2 [default = -1];
> > > > };
> > > >
> > > > message cgroup_root {
> > >
> > > Hmm, how could this be used? With my current CRIU and runc patches, CRIU
> > > would look at 'fd' at only do something special if it is 0, 1, 2. See
> > > below. If it is > 2 or < 0 would be the same. CRIU just ignores all
> > > other FD values as they have to be retrieved via the new callback.
> > >
> > > > > + if (inherit_fd_add(req->inherit_fd[i]->fd, req->inherit_fd[i]->key))
> > > > > + goto err;
> > > > > + continue;
> > > > > + }
> > > > > + if (inherit_fd_add_rpc(req->inherit_fd[i]->key))
> > > >
> > > > I think you misunderstood me. I was thinking to request an inherit fd from
> > > > inherit_fd_lookup_id() to avoid fd clashes on restore
> > >
> > > I still do not get it. How is this related to inherit_fd_lookup_id()
> > >
> > > > (inherit_fd_resolve_clash). It will work this way too, but in case of
> > > > swrk, it will be more efficiant just to run criu swrk will all required
> > > > fds (this way is handled in a previous "if").
> > >
> > > I also do not understand this. 'criu swrk with all required fds'? Like
> > > runc starts CRIU in swrk mode, with an additional parameter as an FD?
> > >
> > > Right now runc does 'criu swrk 3'. Where '3' is the RPC data channel,
> > > right? And you say to add more FDs after the '3'?
> >
> > No. I'm trying to say a different thing.
> >
> > When we start a new process, it inherits all file descriptors from a
> > parent for which the FD_CLOEXEC bit isn't set. So it is possible to
> > start a criu swrk process which will have all required descriptors from
> > a parent process and then we will only need to send their numbers in an
> > rpc message.
>
> I just tried it and I guess I am missing something. How do I open the
> file descriptor I get from the parent process?
You open a file descriptor in a parent process and then it should be
inherited by a child process (criu swrk). I have attached a patch for
runc. Coould you take a look at it? I don't test it, but I think it can
help to understand the idea.
>
> And how do I know if I need to open it read-only, write-only,
> read-write?
The parent process should know properties of all external descriptors.
-------------- next part --------------
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index b9538142..896b7f1e 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -657,7 +657,7 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
Features: criuFeat,
}
- err := c.criuSwrk(nil, req, criuOpts, false)
+ err := c.criuSwrk(nil, req, criuOpts, false, nil)
if err != nil {
logrus.Debugf("%s", err)
return fmt.Errorf("CRIU feature check failed")
@@ -770,7 +770,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
Type: &t,
}
- err := c.criuSwrk(nil, req, nil, false)
+ err := c.criuSwrk(nil, req, nil, false, nil)
if err != nil {
return fmt.Errorf("CRIU version check failed: %s", err)
}
@@ -1059,7 +1059,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
}
}
- err = c.criuSwrk(nil, req, criuOpts, false)
+ err = c.criuSwrk(nil, req, criuOpts, false, nil)
if err != nil {
return err
}
@@ -1103,6 +1103,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
+ var extraFiles []*os.File
+
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
// support for unprivileged restore at the moment.
if c.config.Rootless {
@@ -1193,6 +1195,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// The <key> needs to be the same as during checkpointing.
// We are always using the path.Base(nsPath) as the key in this.
netns, err := os.Open(nsPath)
+ defer netns.Close()
if err != nil {
logrus.Error("If a specific network namespace is defined it must exist.")
logrus.Error(err)
@@ -1204,6 +1207,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// and by re-opening exactly that FD.
inheritFd.Fd = proto.Int32(int32(netns.Fd()))
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
+ extraFiles = append(extraFiles, netns)
}
}
@@ -1265,7 +1269,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
}
}
- return c.criuSwrk(process, req, criuOpts, true)
+ return c.criuSwrk(process, req, criuOpts, true, extraFiles)
}
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
@@ -1295,7 +1299,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
return nil
}
-func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
+func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
if err != nil {
return err
@@ -1335,6 +1339,9 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
cmd.Stdout = process.Stdout
cmd.Stderr = process.Stderr
}
+ if extraFiles != nil {
+ cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...)
+ }
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
if err := cmd.Start(); err != nil {
More information about the CRIU
mailing list