[CRIU] [PATCH v5 22/31] ns: Generate user_ns tree
Kirill Tkhai
ktkhai at virtuozzo.com
Thu Feb 23 07:15:45 PST 2017
Create user namespaces hierarhy from criu main task.
Open ns'es fds, so they are seen for everybody in
fdstore.
Why we do it this way.
1)User namespaces are not correlated with task
hierarhy. Parent task may have a user namespace
of a level bigger, that a child task. So, we
can't restore the user namespaces just by
passing CLONE_NEWUSER in fork_with_pid().
2)CLONE_FS tasks will require user_ns is set at the
moment of clone(), so we have to restore target user_ns
in locality of create_children_and_session() in this case.
v3: Check for WIFEXITED(). Aligned stack.
Use fdstore to keep ns fd.
Create tree from root_item.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
criu/include/namespaces.h | 1
criu/namespaces.c | 102 +++++++++++++++++++++++++++++++++++++++++++++
criu/pstree.c | 6 ++-
3 files changed, 107 insertions(+), 2 deletions(-)
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 447040758..c1d819f32 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -117,6 +117,7 @@ struct ns_id {
} net;
struct {
UsernsEntry *e;
+ int nsfd_id;
} user;
};
};
diff --git a/criu/namespaces.c b/criu/namespaces.c
index bec255465..1e4071168 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -30,6 +30,8 @@
#include "protobuf.h"
#include "util.h"
#include "images/ns.pb-c.h"
+#include "common/scm.h"
+#include "fdstore.h"
static struct ns_desc *ns_desc_array[] = {
&net_ns_desc,
@@ -2112,6 +2114,103 @@ int join_namespaces(void)
return ret;
}
+enum {
+ NS__CREATED = 1,
+ NS__MAPS_POPULATED,
+ NS__RESTORED,
+ NS__EXIT_HELPER,
+ NS__ERROR,
+};
+
+struct ns_arg {
+ struct ns_id *me;
+ futex_t futex;
+ pid_t pid;
+};
+
+static int create_user_ns_hierarhy_fn(void *in_arg)
+{
+ char stack[128] __stack_aligned__;
+ struct ns_arg *arg = NULL, *p_arg = in_arg;
+ futex_t *p_futex = NULL, *futex = NULL;
+ int status, fd, ret = -1;
+ struct ns_id *me, *child;
+ pid_t pid = -1;
+
+ if (p_arg->me != root_user_ns)
+ p_futex = &p_arg->futex;
+ me = p_arg->me;
+
+ if (p_futex) {
+ /* Set self pid to allow parent restore user_ns maps */
+ p_arg->pid = get_self_real_pid();
+ futex_set_and_wake(p_futex, NS__CREATED);
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ pr_err("Can't get self user ns");
+ goto out;
+ }
+ me->user.nsfd_id = fdstore_add(fd);
+ close(fd);
+ if (me->user.nsfd_id < 0) {
+ pr_err("Can't add fd to fdstore\n");
+ goto out;
+ }
+
+ futex_wait_while_lt(p_futex, NS__MAPS_POPULATED);
+ if (prepare_userns_creds()) {
+ pr_err("Can't prepare creds\n");
+ goto out;
+ }
+ }
+
+ arg = mmap(NULL, sizeof(*arg), PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (arg == MAP_FAILED) {
+ pr_perror("Failed to mmap arg");
+ goto out;
+ }
+ futex = &arg->futex;
+
+ list_for_each_entry(child, &me->children, siblings) {
+ arg->me = child;
+ futex_init(futex);
+
+ pid = clone(create_user_ns_hierarhy_fn, stack + 128, CLONE_NEWUSER | CLONE_FILES | SIGCHLD, arg);
+ if (pid < 0) {
+ pr_perror("Can't clone");
+ goto out;
+ }
+ futex_wait_while_lt(futex, NS__CREATED);
+ /* Get child real pid */
+ pid = arg->pid;
+ if (prepare_userns(pid, child->user.e) < 0) {
+ pr_err("Can't prepare child user_ns\n");
+ goto out;
+ }
+ futex_set_and_wake(futex, NS__MAPS_POPULATED);
+
+ errno = 0;
+ if (wait(&status) < 0 || !WIFEXITED(status) || WEXITSTATUS(status)) {
+ pr_perror("Child process waiting: %d\n", status);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (p_futex)
+ futex_set_and_wake(p_futex, ret ? NS__ERROR : NS__RESTORED);
+ if (arg)
+ munmap(arg, sizeof(*arg));
+ return ret ? 1 : 0;
+}
+
+static int create_user_ns_hierarhy(void)
+{
+ struct ns_arg arg = { .me = root_user_ns };
+ return create_user_ns_hierarhy_fn(&arg);
+}
+
int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
{
pid_t pid = vpid(item);
@@ -2120,7 +2219,8 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
pr_info("Restoring namespaces %d flags 0x%lx\n",
vpid(item), clone_flags);
- if ((clone_flags & CLONE_NEWUSER) && prepare_userns_creds())
+ if ((clone_flags & CLONE_NEWUSER) && (prepare_userns_creds() ||
+ create_user_ns_hierarhy()))
return -1;
/*
diff --git a/criu/pstree.c b/criu/pstree.c
index 8a8b00e4f..8cb0bff93 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -876,8 +876,12 @@ static int prepare_pstree_kobj_ids(void)
* be born in a fresh new mount namespace
* which will be populated with all other
* namespaces' entries.
+ *
+ * User namespaces are created in create_ns_hierarhy()
+ * before the tasks, as their hierarhy does not correlated
+ * with tasks hierarhy in any way.
*/
- rsti(item)->clone_flags &= ~CLONE_NEWNS;
+ rsti(item)->clone_flags &= ~(CLONE_NEWNS | CLONE_NEWUSER);
cflags &= CLONE_ALLNS;
More information about the CRIU
mailing list