[CRIU] [PATCH v1 17/17] ns: Allow nested user namespaces

Kirill Tkhai ktkhai at virtuozzo.com
Thu Jan 12 09:54:49 PST 2017


Everything is prepared for nested user namespaces support.
The only thing, we should do more, is to enter to dumped
user namespace's parent before the dump.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 criu/include/namespaces.h |    2 +-
 criu/namespaces.c         |   52 ++++++++++++++++++++++++++++++++++++++++++++-
 criu/pie/restorer.c       |    2 ++
 criu/pstree.c             |   12 +++++++++-
 4 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 3d10203cf..198a4ddf3 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -39,7 +39,7 @@
 #define CLONE_ALLNS	(CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
 
 /* Nested namespaces are supported only for these types */
-#define CLONE_SUBNS	(CLONE_NEWNS)
+#define CLONE_SUBNS	(CLONE_NEWNS | CLONE_NEWUSER)
 #define EXTRA_SIZE	20
 
 #ifndef NSIO
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 8578eaa5b..69c25fed1 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -712,6 +712,12 @@ static int set_parent_ns(struct ns_id *ns, void *oarg)
 		list_add(&ns->child_node, &p_ns->parent_head);
 		pr_debug("%s ns: %u(%d)<<%u(%d)\n", ns->nd->str, ns->kid,
 					ns->type, p_ns->kid, p_ns->type);
+		if (ns->type == NS_ROOT && p_ns->type != NS_CRIU) {
+			/* NS_ROOT is set to root_item's user_ns unconditionally now */
+			pr_err("Containers with init tasks switched away from initial"
+			       "user ns are not supported yet\n");
+			goto close_p_ns_fd;
+		}
 		ret = 0;
 	}
 
@@ -863,7 +869,15 @@ static int dump_user_ns(struct ns_id *ns);
 
 int collect_user_ns(struct ns_id *ns, void *oarg)
 {
+	struct ns_id *p_ns = ns->parent;
+	pid_t pid = -1;
 	UsernsEntry *e;
+	int status;
+
+	if (!p_ns && ns->type != NS_ROOT) {
+		pr_err("Dumping a user_ns without parent\n");
+		return -1;
+	}
 
 	e = xmalloc(sizeof(*e));
 	if (!e)
@@ -877,8 +891,44 @@ int collect_user_ns(struct ns_id *ns, void *oarg)
 	 * mappings, which are used for convirting local id-s to
 	 * userns id-s (userns_uid(), userns_gid())
 	 */
-	if (dump_user_ns(ns))
+	if (p_ns) {
+		/*
+		 * Currently, we are in NS_CRIU. To dump a NS_OTHER ns,
+		 * we need to enter its parent ns. As entered to user_ns
+		 * task has no a way back, we create a child for that.
+		 * NS_ROOT is dumped w/o fork(), it's xids maps is relatively
+		 * to NS_CRIU.
+		 */
+		pid = fork();
+		if (pid == -1) {
+			pr_perror("Can't fork");
+			return -1;
+		}
+
+		if (pid) {
+			if (waitpid(pid, &status, 0) != pid) {
+				pr_perror("Unable to wait the %d process", pid);
+				return -1;
+			}
+			if (status) {
+				pr_err("Can't dump nested user_ns\n");
+				return -1;
+			}
+			return 0;
+		} else {
+			if (switch_ns(p_ns->ns_pid, &user_ns_desc, NULL) < 0)
+				exit(-1);
+		}
+	}
+
+	if (dump_user_ns(ns)) {
+		if (pid == 0)
+			exit(-1);
 		return -1;
+	}
+
+	if (pid == 0)
+		exit(0);
 
 	return 0;
 }
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 7c82a69cd..69428b08c 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -35,6 +35,8 @@
 #include "sk-inet.h"
 #include "vma.h"
 #include "uffd.h"
+#include "pstree.h"
+#include "namespaces.h"
 
 #include "common/lock.h"
 #include "restorer.h"
diff --git a/criu/pstree.c b/criu/pstree.c
index bc66a9eff..b2e8c5cc8 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -851,7 +851,7 @@ static int prepare_pstree_kobj_ids(void)
 		}
 
 		rsti(item)->clone_flags = cflags;
-		if (parent)
+		if (parent) {
 			/*
 			 * Mount namespaces are setns()-ed at
 			 * restore_task_mnt_ns() explicitly,
@@ -862,8 +862,16 @@ static int prepare_pstree_kobj_ids(void)
 			 * be born in a fresh new mount namespace
 			 * which will be populated with all other
 			 * namespaces' entries.
+			 *
+			 * Child task restores in root_item's user_ns, because
+			 * 1)it has to be root in current user_ns to install prctl's
+			 * PR_SET_MM_MAP, and 2)we can't make child local root
+			 * in child user_ns as it requires to map child's user_ns
+			 * to parent's, but it's allowed to write to uid_map and
+			 * gid_map only once, and we need it to restore real mapping.
 			 */
-			rsti(item)->clone_flags &= ~CLONE_NEWNS;
+			rsti(item)->clone_flags &= ~(CLONE_NEWNS|CLONE_NEWUSER);
+		}
 
 		cflags &= CLONE_ALLNS;
 



More information about the CRIU mailing list