[CRIU] [PATCH 07/10] net: allow to dump and restore more than one network namespace
Andrei Vagin
avagin at openvz.org
Wed Aug 31 15:55:30 PDT 2016
From: Andrei Vagin <avagin at virtuozzo.com>
Restore all network namespaces from the root task and then set
a proper namespace for each task after restoring sockets, because
we need to switch network namespaces to restore sockets.
Each socket has to be created in a proper network namespace.
Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
---
criu/cr-restore.c | 10 ++++
criu/include/namespaces.h | 4 +-
criu/include/net.h | 4 +-
criu/namespaces.c | 6 +--
criu/net.c | 127 +++++++++++++++++++++++++++++++++++++++++++++-
criu/pstree.c | 3 ++
6 files changed, 148 insertions(+), 6 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 0b77fb3..d6ceeb3 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -718,6 +718,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
close_service_fd(TRANSPORT_FD_OFF);
+ if (restore_task_net_ns(current))
+ return -1;
+
if (setup_uffd(pid, ta))
return -1;
@@ -2898,6 +2901,13 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
if (rst_prep_creds(pid, core, &creds_pos))
goto err_nv;
+ if (current->parent == NULL) {
+ /* Wait when all tasks restored all files */
+ futex_wait_while_gt(&task_entries->nr_in_progress,
+ current->nr_threads);
+ fini_net_namespaces();
+ }
+
/*
* We're about to search for free VM area and inject the restorer blob
* into it. No irrelevant mmaps/mremaps beyond this point, otherwise
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 2b183f2..7529495 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -36,7 +36,8 @@
#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
/* Nested namespaces are supported only for these types */
-#define CLONE_SUBNS (CLONE_NEWNS)
+#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET)
+
#define EXTRA_SIZE 20
struct ns_desc {
@@ -95,6 +96,7 @@ struct ns_id {
} mnt;
struct {
+ int ns_fd;
int nlsk; /* for sockets collection */
int seqsk; /* to talk to parasite daemons */
} net;
diff --git a/criu/include/net.h b/criu/include/net.h
index ede380f..f88f876 100644
--- a/criu/include/net.h
+++ b/criu/include/net.h
@@ -5,8 +5,10 @@
struct cr_imgset;
extern int dump_net_ns(int ns_id);
-extern int prepare_net_ns(int pid);
+extern int prepare_net_namespaces(void);
+extern void fini_net_namespaces(void);
extern int netns_keep_nsfd(void);
+extern int restore_task_net_ns(struct pstree_item *current);
struct veth_pair {
struct list_head node;
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 1d54a9f..974d1c6 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -1651,9 +1651,6 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
* tree (i.e. -- mnt_ns restoring)
*/
- id = ns_per_id ? item->ids->net_ns_id : pid;
- if ((clone_flags & CLONE_NEWNET) && prepare_net_ns(id))
- return -1;
id = ns_per_id ? item->ids->uts_ns_id : pid;
if ((clone_flags & CLONE_NEWUTS) && prepare_utsns(id))
return -1;
@@ -1661,6 +1658,9 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
if ((clone_flags & CLONE_NEWIPC) && prepare_ipc_ns(id))
return -1;
+ if (prepare_net_namespaces())
+ return -1;
+
/*
* This one is special -- there can be several mount
* namespaces and prepare_mnt_ns handles them itself.
diff --git a/criu/net.c b/criu/net.c
index 7a50640..c312961 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1452,7 +1452,7 @@ int dump_net_ns(int ns_id)
return ret;
}
-int prepare_net_ns(int pid)
+static int prepare_net_ns(int pid)
{
int ret = 0;
NetnsEntry *netns = NULL;
@@ -1483,6 +1483,131 @@ int prepare_net_ns(int pid)
return ret;
}
+static int open_net_ns(struct ns_id *nsid, struct rst_info *rst)
+{
+ int fd, tfd;
+
+ /* Pin one with a file descriptor */
+ fd = open_proc(PROC_SELF, "ns/net");
+ if (fd < 0)
+ return -1;
+ tfd = reopen_as_unused_fd(fd, rst);
+ if (tfd < 0) {
+ close(fd);
+ return -1;
+ }
+ nsid->net.ns_fd = tfd;
+
+ return 0;
+}
+
+int prepare_net_namespaces()
+{
+ struct ns_id *nsid;
+ int rst = -1;
+
+ if (!(root_ns_mask & CLONE_NEWNET))
+ return 0;
+
+ rst = open_proc(PROC_SELF, "ns/net");
+ if (rst < 0)
+ return -1;
+
+ for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+ if (nsid->nd != &net_ns_desc)
+ continue;
+
+ if (nsid->type != NS_ROOT && unshare(CLONE_NEWNS)) {
+ pr_perror("Unable to create a new mntns");
+ goto err;
+ }
+
+ if (prepare_net_ns(nsid->id))
+ goto err;
+
+ if (open_net_ns(nsid, rsti(root_item)))
+ goto err;
+
+ /* And return back to regain the access to the roots yard */
+ if (setns(rst, CLONE_NEWNET)) {
+ pr_perror("Can't restore mntns back");
+ goto err;
+ }
+ }
+
+ close(rst);
+ return 0;
+err:
+ if (rst >= 0)
+ restore_ns(rst, &net_ns_desc);
+ return -1;
+}
+
+void fini_net_namespaces()
+{
+ struct ns_id *nsid;
+
+ if (!(root_ns_mask & CLONE_NEWNS))
+ return;
+
+ for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+ if (nsid->nd != &net_ns_desc)
+ continue;
+ close_safe(&nsid->net.ns_fd);
+ }
+}
+
+static int do_restore_task_net_ns(struct ns_id *nsid, struct pstree_item *current)
+{
+ int fd;
+
+ fd = open_proc(root_item->pid.virt, "fd/%d", nsid->net.ns_fd);
+ if (fd < 0)
+ return -1;
+
+ if (setns(fd, CLONE_NEWNET)) {
+ pr_perror("Can't restore mntns");
+ close(fd);
+ return -1;
+ }
+ close(fd);
+
+ return 0;
+}
+
+int restore_task_net_ns(struct pstree_item *current)
+{
+ if (current->ids && current->ids->has_net_ns_id) {
+ unsigned int id = current->ids->net_ns_id;
+ struct ns_id *nsid;
+
+ /*
+ * Regardless of the namespace a task wants to
+ * live in, by that point they all will live in
+ * root's one (see prepare_pstree_kobj_ids() +
+ * get_clone_mask()). So if the current task's
+ * target namespace is the root's one -- it's
+ * already there, otherwise it will have to do
+ * setns().
+ */
+ if (!current->parent || id == current->parent->ids->net_ns_id)
+ return 0;
+
+ nsid = lookup_ns_by_id(id, &net_ns_desc);
+ if (nsid == NULL) {
+ pr_err("Can't find mount namespace %d\n", id);
+ return -1;
+ }
+
+ BUG_ON(nsid->type == NS_CRIU);
+
+ if (do_restore_task_net_ns(nsid, current))
+ return -1;
+ }
+
+ return 0;
+}
+
int netns_keep_nsfd(void)
{
int ns_fd, ret;
diff --git a/criu/pstree.c b/criu/pstree.c
index d23b1f1..0b219aa 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -12,6 +12,7 @@
#include "tty.h"
#include "mount.h"
#include "asm/dump.h"
+#include "net.h"
#include "protobuf.h"
#include "images/pstree.pb-c.h"
@@ -466,6 +467,8 @@ static int read_pstree_ids(struct pstree_item *pi)
if (pi->ids->has_mnt_ns_id) {
if (rst_add_ns_id(pi->ids->mnt_ns_id, pi, &mnt_ns_desc))
return -1;
+ if (rst_add_ns_id(pi->ids->net_ns_id, pi, &net_ns_desc))
+ return -1;
}
return 0;
--
2.7.4
More information about the CRIU
mailing list