[CRIU] [PATCH v1 13/17] user_ns: Add rst mem for synchronization on restore

Kirill Tkhai ktkhai at virtuozzo.com
Thu Jan 12 09:54:14 PST 2017


Memory to notify other tasks about readiness of a user_ns
(i.e. posibility to switch to this ns).

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 criu/cr-restore.c         |    3 ++
 criu/include/namespaces.h |   19 ++++++++++
 criu/include/restorer.h   |    4 ++
 criu/namespaces.c         |   87 ++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 1c87c6ed3..296742ef5 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -715,6 +715,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (setup_uffd(pid, ta))
 		return -1;
 
+	if (populate_task_user_ns_maps(ta))
+		return -1;
+
 	return sigreturn_restore(pid, ta, args_len, core);
 }
 
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index e033e55c7..eb70097e3 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -72,6 +72,11 @@ struct join_ns {
 	} extra_opts;
 };
 
+struct user_ns_state {
+	char ns_pid[sizeof("2147483647")]; /* INT_MAX */
+	futex_t futex;
+} __attribute__ ((aligned (sizeof(void *))));
+
 enum ns_type {
 	NS_UNKNOWN = 0,
 	NS_CRIU,
@@ -111,12 +116,20 @@ struct ns_id {
 			int seqsk;	/* to talk to parasite daemons */
 		} net;
 		struct {
+			struct user_ns_state *rst_state;
 			UsernsEntry *e;
 		} user;
 	};
 };
 extern struct ns_id *ns_ids;
 
+struct rst_user_ns {
+	struct user_ns_state *st;
+	size_t uid_map_size;
+	size_t gid_map_size;
+	void *data[0];
+};
+
 #define NS_DESC_ENTRY(_cflag, _str)			\
 	{						\
 		.cflag		= _cflag,		\
@@ -124,6 +137,9 @@ extern struct ns_id *ns_ids;
 		.len		= sizeof(_str) - 1,	\
 	}
 
+extern struct user_ns_state	*user_ns_states;
+extern unsigned long		user_ns_states_pos;
+
 extern bool check_ns_proc(struct fd_link *link);
 extern unsigned int child_userns_xid(unsigned int xid, UidGidExtent **map, int n);
 
@@ -201,4 +217,7 @@ extern int __userns_call(const char *func_name, uns_call_t call, int flags,
 
 extern int add_ns_shared_cb(int (*actor)(void *data), void *data);
 
+struct task_restore_args;
+extern int populate_task_user_ns_maps(struct task_restore_args *ta);
+
 #endif /* __CR_NS_H__ */
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index a569fa31d..ac3707d26 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -18,6 +18,7 @@
 #include "timerfd.h"
 #include "shmem.h"
 #include "parasite-vdso.h"
+#include "namespaces.h"
 
 #include <time.h>
 
@@ -149,6 +150,9 @@ struct task_restore_args {
 	struct sock_fprog		*seccomp_filters;
 	unsigned int			seccomp_filters_n;
 
+	struct rst_user_ns		*child_user_ns;
+	unsigned int			child_user_ns_n;
+
 	/* * * * * * * * * * * * * * * * * * * * */
 
 	unsigned long			task_size;
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 949675f9b..f766a50d3 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -26,6 +26,7 @@
 #include "namespaces.h"
 #include "net.h"
 #include "cgroup.h"
+#include "restorer.h"
 
 #include "protobuf.h"
 #include "util.h"
@@ -44,6 +45,9 @@ static struct ns_desc *ns_desc_array[] = {
 
 static unsigned int join_ns_flags;
 
+struct user_ns_state	*user_ns_states;
+unsigned long		user_ns_states_pos;
+
 int check_namespace_opts(void)
 {
 	errno = 22;
@@ -1569,6 +1573,7 @@ int stop_usernsd(void)
 
 static int do_read_user_ns_img(struct ns_id *ns, void *arg)
 {
+	struct user_ns_state **first = arg, *new;
 	struct cr_img *img;
 	UsernsEntry *e;
 	int ret;
@@ -1580,6 +1585,16 @@ static int do_read_user_ns_img(struct ns_id *ns, void *arg)
 	close_image(img);
 	if (ret < 0)
 		return -1;
+	new = rst_mem_alloc(sizeof(*new), RM_SHREMAP);
+	if (!new) {
+		pr_perror("Can't alloc user_ns_state");
+		return -1;
+	}
+
+	sprintf(new->ns_pid, "%d", ns->ns_pid);
+	futex_init(&new->futex);
+	ns->user.rst_state = new;
+
 	ns->user.e = e;
 	if (ns->type == NS_ROOT)
 		userns_entry = e;
@@ -1593,6 +1608,8 @@ static int do_read_user_ns_img(struct ns_id *ns, void *arg)
 		list_add(&ns->child_node, &ns->parent->parent_head);
 	}
 
+	if (!*first)
+		*first = new;
 	return 0;
 }
 
@@ -1603,7 +1620,10 @@ int read_user_ns_imgs(void)
 	if (!(root_ns_mask & CLONE_NEWUSER))
 		return 0;
 
-	ret = walk_namespaces(&user_ns_desc, do_read_user_ns_img, NULL);
+	user_ns_states_pos = rst_mem_align_cpos(RM_SHREMAP);
+	user_ns_states = NULL;
+
+	ret = walk_namespaces(&user_ns_desc, do_read_user_ns_img, &user_ns_states);
 
 	return ret;
 }
@@ -1621,6 +1641,71 @@ int prepare_userns(struct pstree_item *item)
 	return 0;
 }
 
+int populate_task_user_ns_maps(struct task_restore_args *ta)
+{
+	struct ns_id *ns, *c_ns;
+	struct rst_user_ns *new;
+	size_t size, off;
+	UsernsEntry *e;
+	void *maps;
+	int i;
+
+	ta->child_user_ns = NULL;
+	ta->child_user_ns_n = 0;
+
+	if (!current->ids->has_user_ns_id)
+		return 0;
+
+	ns = lookup_ns_by_id(current->ids->user_ns_id, &user_ns_desc);
+	if (!ns) {
+		pr_err("Can't find user_ns\n");
+		return -1;
+	}
+
+	if (ns->ns_pid != current->pid.virt || list_empty(&ns->parent_head))
+		return 0;
+
+	ta->child_user_ns = (struct rst_user_ns *)rst_mem_align_cpos(RM_PRIVATE);
+
+	list_for_each_entry(c_ns, &ns->parent_head, child_node) {
+		new = (struct rst_user_ns *)rst_mem_alloc(sizeof(*new), RM_PRIVATE);
+		if (!new) {
+			pr_err("Can't alloc rst mem\n");
+			return -1;
+		}
+
+		new->st = c_ns->user.rst_state;
+		e = c_ns->user.e;
+
+		size = sizeof("4294967295 4294967295 4294967295\n"); /* UINT_MAX */
+		size *= (e->n_uid_map + e->n_gid_map);
+		maps = rst_mem_alloc(size, RM_PRIVATE);
+		if (!maps) {
+			pr_err("Can't alloc rst mem for maps\n");
+			return -1;
+		}
+
+		for (i = 0, off = 0; i < e->n_uid_map; i++)
+			off += sprintf((char *)new->data + off, "%u %u %u\n",
+					e->uid_map[i]->first,
+					e->uid_map[i]->lower_first,
+					e->uid_map[i]->count);
+		new->uid_map_size = off;
+
+		for (i = 0, off = 0; i < e->n_gid_map; i++)
+			off += sprintf((char *)new->data + new->uid_map_size + off, "%u %u %u\n",
+					e->gid_map[i]->first,
+					e->gid_map[i]->lower_first,
+					e->gid_map[i]->count);
+		new->gid_map_size = off;
+
+		rst_mem_free_last_bytes(RM_PRIVATE, size - (new->uid_map_size + new->gid_map_size));
+		ta->child_user_ns_n++;
+	}
+
+	return 0;
+}
+
 int collect_namespaces(bool for_dump)
 {
 	int ret;



More information about the CRIU mailing list