[CRIU] [PATCH v1 15/17] user_ns: Add user_ns ordering to restorer and restore user_ns

Kirill Tkhai ktkhai at virtuozzo.com
Thu Jan 12 09:54:32 PST 2017


1)setns() to a ns should be called after the namespace is created.
2)unshare() should be called after parent user namespace's uid and
gid maps are written.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 criu/include/namespaces.h |    2 +
 criu/namespaces.c         |   11 +++++
 criu/pie/restorer.c       |   98 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+)

diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index eb70097e3..3d10203cf 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -74,6 +74,8 @@ struct join_ns {
 
 struct user_ns_state {
 	char ns_pid[sizeof("2147483647")]; /* INT_MAX */
+#define USER_NS__CREATED  1
+#define USER_NS__RESTORED 2
 	futex_t futex;
 } __attribute__ ((aligned (sizeof(void *))));
 
diff --git a/criu/namespaces.c b/criu/namespaces.c
index f766a50d3..8578eaa5b 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -1631,6 +1631,7 @@ int read_user_ns_imgs(void)
 int prepare_userns(struct pstree_item *item)
 {
 	UsernsEntry *e = userns_entry;
+	struct ns_id *ns;
 
 	if (write_id_map(item->pid.real, e->uid_map, e->n_uid_map, "uid_map"))
 		return -1;
@@ -1638,6 +1639,16 @@ int prepare_userns(struct pstree_item *item)
 	if (write_id_map(item->pid.real, e->gid_map, e->n_gid_map, "gid_map"))
 		return -1;
 
+	if (item->ids->has_user_ns_id) {
+		ns = lookup_ns_by_id(item->ids->user_ns_id, &user_ns_desc);
+		if (!ns) {
+			pr_err("Can't find user ns\n");
+			return -1;
+		}
+		BUG_ON(ns->type != NS_ROOT);
+		futex_set(&ns->user.rst_state->futex, USER_NS__RESTORED);
+	}
+
 	return 0;
 }
 
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index ccd0fbdf1..2b8ba3074 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -46,6 +46,7 @@
 
 #include "shmem.h"
 #include "restorer.h"
+#include "namespaces.h"
 
 #ifndef PR_SET_PDEATHSIG
 #define PR_SET_PDEATHSIG 1
@@ -874,6 +875,77 @@ static int vma_remap(VmaEntry *vma_entry, int uffd)
 	return 0;
 }
 
+static int restorer_set_user_ns(int proc_fd, const char *pid_file)
+{
+	int pid_fd, user_fd, ret;
+
+	pid_fd = sys_openat(proc_fd, pid_file, O_RDONLY, 0);
+	if (pid_fd < 0) {
+		pr_err("Can't open %s: %d\n", pid_file, (int)pid_fd);
+		return -1;
+	}
+
+	user_fd = sys_openat(pid_fd, "ns/user", O_RDONLY, 0);
+	sys_close(pid_fd);
+	if (user_fd < 0) {
+		pr_err("Can't open pid file: %d\n", (int)user_fd);
+		return -1;
+	}
+
+	ret = sys_setns(user_fd, CLONE_NEWUSER);
+	sys_close(user_fd);
+	if (ret < 0) {
+		pr_err("Can't set user_ns of %s: %d\n", pid_file, (int)ret);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int do_write_user_ns_map(int pid_fd, char *pid, char *fname, char *buf, unsigned size)
+{
+	int map_fd, ret;
+
+	if (!size)
+		return 0;
+
+	map_fd = sys_openat(pid_fd, fname, O_WRONLY, 0);
+	if (map_fd < 0) {
+		pr_err("Can't open %s/%s file: %d\n", pid, fname, map_fd);
+		return -1;
+	}
+
+	ret = sys_write(map_fd, buf, size);
+	sys_close(map_fd);
+	if (ret != size) {
+		pr_err("Can't write %s/%s (%u, %u)\n", pid, fname, size, ret);
+		return -1;
+	}
+	return 0;
+}
+
+static int restorer_write_user_ns_maps(int proc_fd, struct rst_user_ns *map)
+{
+	char *pid_file = map->st->ns_pid;
+	int pid_fd, ret;
+
+	pid_fd = sys_openat(proc_fd, pid_file, O_RDONLY, 0);
+	if (pid_fd < 0) {
+		pr_err("Can't open %s: %d\n", pid_file, (int)pid_fd);
+		return -1;
+	}
+
+	ret = do_write_user_ns_map(pid_fd, pid_file, "uid_map",
+				   (char *)&map->data[0], map->uid_map_size);
+	if (ret)
+		goto out;
+	ret = do_write_user_ns_map(pid_fd, pid_file, "gid_map",
+				   (char *)&map->data[0] + map->uid_map_size, map->gid_map_size);
+out:
+	sys_close(pid_fd);
+	return ret;
+}
+
 static int timerfd_arm(struct task_restore_args *args)
 {
 	int i;
@@ -1384,6 +1456,32 @@ long __export_restore_task(struct task_restore_args *args)
 	if (ret)
 		goto core_restore_end;
 
+	if (args->setns_user_ns) {
+		futex_wait_while_lt(&args->setns_user_ns->futex, USER_NS__CREATED);
+		ret = restorer_set_user_ns(args->proc_fd, args->setns_user_ns->ns_pid);
+		if (ret < 0)
+			goto core_restore_end;
+		futex_wait_while_lt(&args->setns_user_ns->futex, USER_NS__RESTORED);
+	}
+
+	if (args->unshare_user_ns) {
+		ret = sys_unshare(CLONE_NEWUSER);
+		if (ret) {
+			pr_err("sys_unshare(CLONE_NEWUSER) failed with %d\n", (int)ret);
+			goto core_restore_end;
+		}
+		futex_set_and_wake(&args->unshare_user_ns->futex, USER_NS__CREATED);
+		futex_wait_while_lt(&args->unshare_user_ns->futex, USER_NS__RESTORED);
+	}
+
+	for (i = 0; i < args->child_user_ns_n; i++) {
+		struct rst_user_ns *p = args->child_user_ns;
+		futex_wait_while_lt(&p->st->futex, USER_NS__CREATED);
+		if (restorer_write_user_ns_maps(args->proc_fd, p))
+			goto core_restore_end;
+		futex_set_and_wake(&p->st->futex, USER_NS__RESTORED);
+	}
+
 	/*
 	 * We need to prepare a valid sigframe here, so
 	 * after sigreturn the kernel will pick up the



More information about the CRIU mailing list