[CRIU] [PATCH 08/11] net: allow to dump and restore more than one network namespace
Andrei Vagin
avagin at virtuozzo.com
Fri Feb 3 15:05:46 PST 2017
On Fri, Feb 03, 2017 at 05:29:23PM +0300, Kirill Tkhai wrote:
>
>
> On 02.02.2017 03:04, Andrei Vagin wrote:
> > From: Andrei Vagin <avagin at virtuozzo.com>
> >
> > Restore all network namespaces from the root task and then set
> > a proper namespace for each task after restoring sockets, because
> > we need to switch network namespaces to restore sockets.
> >
> > Each socket has to be created in a proper network namespace.
> >
> > Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
> > ---
> > criu/cr-restore.c | 21 ++++++----
> > criu/include/namespaces.h | 4 +-
> > criu/include/net.h | 6 ++-
> > criu/namespaces.c | 6 +--
> > criu/net.c | 104 +++++++++++++++++++++++++++++++++++++++++++++-
> > criu/pstree.c | 4 ++
> > 6 files changed, 131 insertions(+), 14 deletions(-)
> >
> > diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> > index 681655d..288add3 100644
> > --- a/criu/cr-restore.c
> > +++ b/criu/cr-restore.c
> > @@ -713,6 +713,13 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
> > if (prepare_vmas(current, ta))
> > return -1;
> >
> > + /*
> > + * Sockets have to be restored in their network namespaces,
> > + * so a task namespace has to be restored after sockets.
> > + */
> > + if (restore_task_net_ns(current))
> > + return -1;
> > +
> > if (setup_uffd(pid, ta))
> > return -1;
> >
> > @@ -1388,14 +1395,6 @@ static int restore_task_with_children(void *_arg)
> > if (ret < 0)
> > goto err;
> >
> > - if (ca->clone_flags & CLONE_NEWNET) {
> > - ret = unshare(CLONE_NEWNET);
> > - if (ret) {
> > - pr_perror("Can't unshare net-namespace");
> > - goto err;
> > - }
> > - }
> > -
> > if (!(ca->clone_flags & CLONE_FILES)) {
> > ret = close_old_fds();
> > if (ret)
> > @@ -2965,6 +2964,12 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
> > if (rst_prep_creds(pid, core, &creds_pos))
> > goto err_nv;
> >
> > + if (current->parent == NULL) {
> > + /* Wait when all tasks restored all files */
> > + restore_wait_other_tasks();
> > + fini_net_namespaces();
> > + }
> > +
> > /*
> > * We're about to search for free VM area and inject the restorer blob
> > * into it. No irrelevant mmaps/mremaps beyond this point, otherwise
> > diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> > index 18eafb2..522c098 100644
> > --- a/criu/include/namespaces.h
> > +++ b/criu/include/namespaces.h
> > @@ -36,7 +36,8 @@
> > #define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
> >
> > /* Nested namespaces are supported only for these types */
> > -#define CLONE_SUBNS (CLONE_NEWNS)
> > +#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET)
> > +
> > #define EXTRA_SIZE 20
> >
> > struct ns_desc {
> > @@ -95,6 +96,7 @@ struct ns_id {
> > } mnt;
> >
> > struct {
> > + int ns_fd; /* a file handle for the namespace */
> > int nlsk; /* for sockets collection */
> > int seqsk; /* to talk to parasite daemons */
> > } net;
> > diff --git a/criu/include/net.h b/criu/include/net.h
> > index deac65f..49eca36 100644
> > --- a/criu/include/net.h
> > +++ b/criu/include/net.h
> > @@ -12,9 +12,13 @@
> >
> > struct cr_imgset;
> > extern int dump_net_ns(int ns_id);
> > -extern int prepare_net_ns(int pid);
> > +extern int prepare_net_namespaces(void);
> > +extern void fini_net_namespaces(void);
> > extern int netns_keep_nsfd(void);
> >
> > +struct pstree_item;
> > +extern int restore_task_net_ns(struct pstree_item *current);
> > +
> > struct veth_pair {
> > struct list_head node;
> > char *inside;
> > diff --git a/criu/namespaces.c b/criu/namespaces.c
> > index f655c16..64175f2 100644
> > --- a/criu/namespaces.c
> > +++ b/criu/namespaces.c
> > @@ -1660,9 +1660,6 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
> > * tree (i.e. -- mnt_ns restoring)
> > */
> >
> > - id = ns_per_id ? item->ids->net_ns_id : pid;
> > - if ((clone_flags & CLONE_NEWNET) && prepare_net_ns(id))
> > - return -1;
> > id = ns_per_id ? item->ids->uts_ns_id : pid;
> > if ((clone_flags & CLONE_NEWUTS) && prepare_utsns(id))
> > return -1;
> > @@ -1670,6 +1667,9 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
> > if ((clone_flags & CLONE_NEWIPC) && prepare_ipc_ns(id))
> > return -1;
> >
> > + if (prepare_net_namespaces())
> > + return -1;
> > +
> > /*
> > * This one is special -- there can be several mount
> > * namespaces and prepare_mnt_ns handles them itself.
> > diff --git a/criu/net.c b/criu/net.c
> > index 7dadc8d..a7e0a6e 100644
> > --- a/criu/net.c
> > +++ b/criu/net.c
> > @@ -1670,7 +1670,7 @@ int dump_net_ns(int ns_id)
> > return ret;
> > }
> >
> > -int prepare_net_ns(int pid)
> > +static int prepare_net_ns(int pid)
> > {
> > int ret = 0;
> > NetnsEntry *netns = NULL;
> > @@ -1702,6 +1702,108 @@ int prepare_net_ns(int pid)
> > return ret;
> > }
> >
> > +static int open_net_ns(struct ns_id *nsid, struct rst_info *rst)
> > +{
> > + int fd, tfd;
> > +
> > + /* Pin one with a file descriptor */
> > + fd = open_proc(PROC_SELF, "ns/net");
> > + if (fd < 0)
> > + return -1;
> > + tfd = reopen_as_unused_fd(fd, rst);
> > + if (tfd < 0) {
> > + close(fd);
> > + return -1;
> > + }
> > + nsid->net.ns_fd = tfd;
> > +
> > + return 0;
> > +}
> > +
> > +int prepare_net_namespaces()
> > +{
> > + struct ns_id *nsid;
> > +
> > + if (!(root_ns_mask & CLONE_NEWNET))
> > + return 0;
> > +
> > + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> > + if (nsid->nd != &net_ns_desc)
> > + continue;
> > +
> > + if (unshare(CLONE_NEWNET)) {
>
> You create net namespaces from criu root task in NS_CRIU user_ns,
Currently prepare_net_namespaces() is called from the root task of a
restored tree, so it is called from NS_ROOT.
> which is wrong in case of (root_ns_mask & CLONE_NEWNER) != 0.
>
> To do not loose NS_ROOT user_ns in net_ns, you may do unshare()s
> in a child task. Create the child using CLONE_FILES, and you'll
> see the same descriptors in criu root task.
>
> > + pr_perror("Unable to create a new netns");
> > + goto err;
> > + }
> > +
> > + if (prepare_net_ns(nsid->id))
> > + goto err;
> > +
> > + if (open_net_ns(nsid, rsti(root_item)))
> > + goto err;
> > + }
> > +
> > + return 0;
> > +err:
> > + return -1;
> > +}
> > +
> > +void fini_net_namespaces()
> > +{
> > + struct ns_id *nsid;
> > +
> > + if (!(root_ns_mask & CLONE_NEWNET))
> > + return;
> > +
> > + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> > + if (nsid->nd != &net_ns_desc)
> > + continue;
> > + close_safe(&nsid->net.ns_fd);
> > + }
> > +}
> > +
> > +static int do_restore_task_net_ns(struct ns_id *nsid, struct pstree_item *current)
> > +{
> > + int fd;
> > +
> > + if (!(root_ns_mask & CLONE_NEWNET))
> > + return 0;
> > +
> > + fd = open_proc(root_item->pid->ns[0].virt, "fd/%d", nsid->net.ns_fd);
> > + if (fd < 0)
> > + return -1;
> > +
> > + if (setns(fd, CLONE_NEWNET)) {
> > + pr_perror("Can't restore netns");
> > + close(fd);
> > + return -1;
> > + }
> > + close(fd);
> > +
> > + return 0;
> > +}
> > +
> > +int restore_task_net_ns(struct pstree_item *current)
> > +{
> > + if (current->ids && current->ids->has_net_ns_id) {
> > + unsigned int id = current->ids->net_ns_id;
> > + struct ns_id *nsid;
> > +
> > + nsid = lookup_ns_by_id(id, &net_ns_desc);
> > + if (nsid == NULL) {
> > + pr_err("Can't find mount namespace %d\n", id);
> > + return -1;
> > + }
> > +
> > + BUG_ON(nsid->type == NS_CRIU);
> > +
> > + if (do_restore_task_net_ns(nsid, current))
> > + return -1;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > int netns_keep_nsfd(void)
> > {
> > int ns_fd, ret;
> > diff --git a/criu/pstree.c b/criu/pstree.c
> > index 833b3d0..bce7b72 100644
> > --- a/criu/pstree.c
> > +++ b/criu/pstree.c
> > @@ -14,6 +14,8 @@
> > #include "mount.h"
> > #include "dump.h"
> > #include "util.h"
> > +#include "net.h"
> > +
> > #include "protobuf.h"
> > #include "images/pstree.pb-c.h"
> > #include "crtools.h"
> > @@ -472,6 +474,8 @@ static int read_pstree_ids(struct pstree_item *pi)
> > if (pi->ids->has_mnt_ns_id) {
> > if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
> > return -1;
> > + if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
> > + return -1;
> > }
> >
> > return 0;
> >
More information about the CRIU
mailing list