[CRIU] [PATCH 08/11 v2] net: allow to dump and restore more than one network namespace
Dmitry Safonov
0x7f454c46 at gmail.com
Wed Feb 8 04:06:56 PST 2017
2017-02-08 12:55 GMT+03:00 Pavel Emelyanov <xemul at virtuozzo.com>:
> On 02/07/2017 10:06 PM, Andrei Vagin wrote:
>> From: Andrei Vagin <avagin at virtuozzo.com>
>>
>> Restore all network namespaces from the root task and then set
>> a proper namespace for each task after restoring sockets, because
>> we need to switch network namespaces to restore sockets.
>>
>> Each socket has to be created in a proper network namespace.
>>
>> v2: fix a typo bug
>>
>> Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
>> ---
>> criu/cr-restore.c | 21 ++++++----
>> criu/include/namespaces.h | 4 +-
>> criu/include/net.h | 6 ++-
>> criu/namespaces.c | 6 +--
>> criu/net.c | 104 +++++++++++++++++++++++++++++++++++++++++++++-
>> criu/pstree.c | 6 +++
>> 6 files changed, 133 insertions(+), 14 deletions(-)
>>
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index 681655d..288add3 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -713,6 +713,13 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
>> if (prepare_vmas(current, ta))
>> return -1;
>>
>> + /*
>> + * Sockets have to be restored in their network namespaces,
>> + * so a task namespace has to be restored after sockets.
>> + */
>> + if (restore_task_net_ns(current))
>> + return -1;
>> +
>> if (setup_uffd(pid, ta))
>> return -1;
>>
>> @@ -1388,14 +1395,6 @@ static int restore_task_with_children(void *_arg)
>> if (ret < 0)
>> goto err;
>>
>> - if (ca->clone_flags & CLONE_NEWNET) {
>> - ret = unshare(CLONE_NEWNET);
>> - if (ret) {
>> - pr_perror("Can't unshare net-namespace");
>> - goto err;
>> - }
>> - }
>> -
>> if (!(ca->clone_flags & CLONE_FILES)) {
>> ret = close_old_fds();
>> if (ret)
>> @@ -2965,6 +2964,12 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
>> if (rst_prep_creds(pid, core, &creds_pos))
>> goto err_nv;
>>
>> + if (current->parent == NULL) {
>> + /* Wait when all tasks restored all files */
>> + restore_wait_other_tasks();
>> + fini_net_namespaces();
>> + }
>> +
>> /*
>> * We're about to search for free VM area and inject the restorer blob
>> * into it. No irrelevant mmaps/mremaps beyond this point, otherwise
>> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
>> index 18eafb2..522c098 100644
>> --- a/criu/include/namespaces.h
>> +++ b/criu/include/namespaces.h
>> @@ -36,7 +36,8 @@
>> #define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
>>
>> /* Nested namespaces are supported only for these types */
>> -#define CLONE_SUBNS (CLONE_NEWNS)
>> +#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET)
>> +
>> #define EXTRA_SIZE 20
>>
>> struct ns_desc {
>> @@ -95,6 +96,7 @@ struct ns_id {
>> } mnt;
>>
>> struct {
>> + int ns_fd; /* a file handle for the namespace */
>> int nlsk; /* for sockets collection */
>> int seqsk; /* to talk to parasite daemons */
>> } net;
>> diff --git a/criu/include/net.h b/criu/include/net.h
>> index deac65f..49eca36 100644
>> --- a/criu/include/net.h
>> +++ b/criu/include/net.h
>> @@ -12,9 +12,13 @@
>>
>> struct cr_imgset;
>> extern int dump_net_ns(int ns_id);
>> -extern int prepare_net_ns(int pid);
>> +extern int prepare_net_namespaces(void);
>> +extern void fini_net_namespaces(void);
>> extern int netns_keep_nsfd(void);
>>
>> +struct pstree_item;
>> +extern int restore_task_net_ns(struct pstree_item *current);
>> +
>> struct veth_pair {
>> struct list_head node;
>> char *inside;
>> diff --git a/criu/namespaces.c b/criu/namespaces.c
>> index b1cca98..dce8ab5 100644
>> --- a/criu/namespaces.c
>> +++ b/criu/namespaces.c
>> @@ -1660,9 +1660,6 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
>> * tree (i.e. -- mnt_ns restoring)
>> */
>>
>> - id = ns_per_id ? item->ids->net_ns_id : pid;
>> - if ((clone_flags & CLONE_NEWNET) && prepare_net_ns(id))
>> - return -1;
>> id = ns_per_id ? item->ids->uts_ns_id : pid;
>> if ((clone_flags & CLONE_NEWUTS) && prepare_utsns(id))
>> return -1;
>> @@ -1670,6 +1667,9 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
>> if ((clone_flags & CLONE_NEWIPC) && prepare_ipc_ns(id))
>> return -1;
>>
>> + if (prepare_net_namespaces())
>> + return -1;
>> +
>> /*
>> * This one is special -- there can be several mount
>> * namespaces and prepare_mnt_ns handles them itself.
>> diff --git a/criu/net.c b/criu/net.c
>> index 7dadc8d..a7e0a6e 100644
>> --- a/criu/net.c
>> +++ b/criu/net.c
>> @@ -1670,7 +1670,7 @@ int dump_net_ns(int ns_id)
>> return ret;
>> }
>>
>> -int prepare_net_ns(int pid)
>> +static int prepare_net_ns(int pid)
>> {
>> int ret = 0;
>> NetnsEntry *netns = NULL;
>> @@ -1702,6 +1702,108 @@ int prepare_net_ns(int pid)
>> return ret;
>> }
>>
>> +static int open_net_ns(struct ns_id *nsid, struct rst_info *rst)
>> +{
>> + int fd, tfd;
>> +
>> + /* Pin one with a file descriptor */
>> + fd = open_proc(PROC_SELF, "ns/net");
>> + if (fd < 0)
>> + return -1;
>> + tfd = reopen_as_unused_fd(fd, rst);
>> + if (tfd < 0) {
>> + close(fd);
>> + return -1;
>> + }
>> + nsid->net.ns_fd = tfd;
>> +
>> + return 0;
>> +}
>> +
>> +int prepare_net_namespaces()
>> +{
>> + struct ns_id *nsid;
>> +
>> + if (!(root_ns_mask & CLONE_NEWNET))
>> + return 0;
>> +
>> + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>> + if (nsid->nd != &net_ns_desc)
>> + continue;
>> +
>> + if (unshare(CLONE_NEWNET)) {
>> + pr_perror("Unable to create a new netns");
>> + goto err;
>> + }
>> +
>> + if (prepare_net_ns(nsid->id))
>> + goto err;
>> +
>> + if (open_net_ns(nsid, rsti(root_item)))
>> + goto err;
>> + }
>> +
>> + return 0;
>> +err:
>> + return -1;
>> +}
>> +
>> +void fini_net_namespaces()
>> +{
>> + struct ns_id *nsid;
>> +
>> + if (!(root_ns_mask & CLONE_NEWNET))
>> + return;
>> +
>> + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>> + if (nsid->nd != &net_ns_desc)
>> + continue;
>> + close_safe(&nsid->net.ns_fd);
>> + }
>> +}
>> +
>> +static int do_restore_task_net_ns(struct ns_id *nsid, struct pstree_item *current)
>
> From here to ...
>
>> +{
>> + int fd;
>> +
>> + if (!(root_ns_mask & CLONE_NEWNET))
>> + return 0;
>> +
>> + fd = open_proc(root_item->pid->ns[0].virt, "fd/%d", nsid->net.ns_fd);
>> + if (fd < 0)
>> + return -1;
>> +
>> + if (setns(fd, CLONE_NEWNET)) {
>> + pr_perror("Can't restore netns");
>> + close(fd);
>> + return -1;
>> + }
>> + close(fd);
>> +
>> + return 0;
>> +}
>> +
>> +int restore_task_net_ns(struct pstree_item *current)
>> +{
>> + if (current->ids && current->ids->has_net_ns_id) {
>> + unsigned int id = current->ids->net_ns_id;
>> + struct ns_id *nsid;
>> +
>> + nsid = lookup_ns_by_id(id, &net_ns_desc);
>> + if (nsid == NULL) {
>> + pr_err("Can't find mount namespace %d\n", id);
>> + return -1;
>> + }
>> +
>> + BUG_ON(nsid->type == NS_CRIU);
>> +
>> + if (do_restore_task_net_ns(nsid, current))
>> + return -1;
>
> ... here. We already have very similar code in restore_task_mnt_ns(). Can
> we merge them?
Yeah, I noticed this while writing sub-UTS C/R, here is my attempt to
generalize (on top of this):
https://github.com/0x7f454c46/criu/commit/1d88b490a78c7b141b259eec030f4a258b272996
I used it in C/R of nested uts, but didn't touch mnt.
So, I suggest to do it on top with my patch, if it's suitable - as I've
already wrote the code and refactored this.
>
>> + }
>> +
>> + return 0;
>> +}
>> +
>> int netns_keep_nsfd(void)
>> {
>> int ns_fd, ret;
>> diff --git a/criu/pstree.c b/criu/pstree.c
>> index 833b3d0..6404350 100644
>> --- a/criu/pstree.c
>> +++ b/criu/pstree.c
>> @@ -14,6 +14,8 @@
>> #include "mount.h"
>> #include "dump.h"
>> #include "util.h"
>> +#include "net.h"
>> +
>> #include "protobuf.h"
>> #include "images/pstree.pb-c.h"
>> #include "crtools.h"
>> @@ -473,6 +475,10 @@ static int read_pstree_ids(struct pstree_item *pi)
>> if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
>> return -1;
>> }
>> + if (pi->ids->has_net_ns_id) {
>> + if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
>> + return -1;
>> + }
>>
>> return 0;
>> }
>>
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
--
Dmitry
More information about the CRIU
mailing list