[CRIU] [PATCH 1/4] Support for dumping/restoring user namespaces

Sophie Blee-Goldman ableegoldman at google.com
Tue Aug 12 15:56:22 PDT 2014


Adds basic support for user namespaces by dumping and restoring
the namespace itself and the uid/gid maps of the root process.

Currently depends on a kernel patch to avoid failing on the prctl
syscall by checking for CAP_SYS_RESOURCE in the user namespace
instead of in the global one.

Signed-off-by: Sophie Blee-Goldman <ableegoldman at google.com>
---
 Makefile.crtools        |   1 +
 cr-restore.c            |   7 ++
 cr-show.c               |   2 +
 image-desc.c            |   1 +
 include/image-desc.h    |   1 +
 include/magic.h         |   1 +
 include/namespaces.h    |   1 -
 include/protobuf-desc.h |   5 +-
 include/syscall-types.h |   6 +-
 include/user_ns.h       |   9 ++
 namespaces.c            |  27 +++++-
 protobuf-desc.c         |   1 +
 protobuf/Makefile       |   1 +
 protobuf/core.proto     |   1 +
 protobuf/userns.proto   |   9 ++
 pstree.c                |   2 +
 user_ns.c               | 227 ++++++++++++++++++++++++++++++++++++++++++++++++
 17 files changed, 296 insertions(+), 6 deletions(-)
 create mode 100644 include/user_ns.h
 create mode 100644 protobuf/userns.proto
 create mode 100644 user_ns.c

diff --git a/Makefile.crtools b/Makefile.crtools
index 6033b2c..8e680d6 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -34,6 +34,7 @@ obj-y	+= pipes.o
 obj-y	+= fifo.o
 obj-y	+= file-ids.o
 obj-y	+= namespaces.o
+obj-y	+= user_ns.o
 obj-y	+= uts_ns.o
 obj-y	+= ipc_ns.o
 obj-y	+= netfilter.o
diff --git a/cr-restore.c b/cr-restore.c
index 2bc98e8..a93fa74 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -52,6 +52,7 @@
 #include "restorer-blob.h"
 #include "crtools.h"
 #include "namespaces.h"
+#include "user_ns.h"
 #include "mem.h"
 #include "mount.h"
 #include "fsnotify.h"
@@ -1612,6 +1613,12 @@ static int restore_root_task(struct pstree_item *init)
 	if (ret)
 		goto out;
 
+	if (root_ns_mask & CLONE_NEWUSER) {
+		ret = restore_user_ns(init->pid.real, init->ids->user_ns_id);
+		if (ret < 0)
+			goto out;
+	}
+
 	ret = run_scripts("setup-namespaces");
 	if (ret)
 		goto out;
diff --git a/cr-show.c b/cr-show.c
index 0e1a2c6..2b28746 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -21,6 +21,7 @@
 #include "util.h"
 #include "sockets.h"
 #include "image.h"
+#include "user_ns.h"
 #include "uts_ns.h"
 #include "ipc_ns.h"
 #include "pstree.h"
@@ -291,6 +292,7 @@ static struct show_image_info show_infos[] = {
 	SHOW_VERT(CORE),
 	SHOW_VERT(IDS),
 	SHOW_VERT(CREDS),
+	SHOW_VERT(USERNS),
 	SHOW_VERT(UTSNS),
 	SHOW_VERT(IPC_VAR),
 	SHOW_VERT(FS),
diff --git a/image-desc.c b/image-desc.c
index 49dc29d..814c3b2 100644
--- a/image-desc.c
+++ b/image-desc.c
@@ -52,6 +52,7 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
 	FD_ENTRY(POSIX_TIMERS,	"posix-timers-%d"),
 	FD_ENTRY(CREDS,		"creds-%d"),
 	FD_ENTRY(UTSNS,		"utsns-%d"),
+	FD_ENTRY(USERNS,	"userns-%d"),
 	FD_ENTRY(IPC_VAR,	"ipcns-var-%d"),
 	FD_ENTRY(IPCNS_SHM,	"ipcns-shm-%d"),
 	FD_ENTRY(IPCNS_MSG,	"ipcns-msg-%d"),
diff --git a/include/image-desc.h b/include/image-desc.h
index 93b3392..18535e1 100644
--- a/include/image-desc.h
+++ b/include/image-desc.h
@@ -25,6 +25,7 @@ enum {
 	/*
 	 * NS entries
 	 */
+	CR_FD_USERNS,
 	CR_FD_UTSNS,
 	CR_FD_MNTS,
 
diff --git a/include/magic.h b/include/magic.h
index 5192a60..06db3e3 100644
--- a/include/magic.h
+++ b/include/magic.h
@@ -40,6 +40,7 @@
 #define ITIMERS_MAGIC		0x57464056 /* Kostroma */
 #define POSIX_TIMERS_MAGIC	0x52603957 /* Lipetsk */
 #define SK_QUEUES_MAGIC		0x56264026 /* Suzdal */
+#define USERNS_MAGIC		0x55474908 /* Kazan */
 #define UTSNS_MAGIC		0x54473203 /* Smolensk */
 #define CREDS_MAGIC		0x54023547 /* Kozelsk */
 #define IPC_VAR_MAGIC		0x53115007 /* Samara */
diff --git a/include/namespaces.h b/include/namespaces.h
index 350b8b4..bc67519 100644
--- a/include/namespaces.h
+++ b/include/namespaces.h
@@ -34,7 +34,6 @@ extern struct ns_id *ns_ids;
 extern bool check_ns_proc(struct fd_link *link);
 
 extern struct ns_desc pid_ns_desc;
-extern struct ns_desc user_ns_desc;
 extern unsigned long root_ns_mask;
 
 extern const struct fdtype_ops nsfile_dump_ops;
diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
index 01c9f4c..1c8f9ce 100644
--- a/include/protobuf-desc.h
+++ b/include/protobuf-desc.h
@@ -52,14 +52,15 @@ enum {
 	PB_IRMAP_CACHE,
 	PB_CGROUP,
 	PB_TIMERFD,
+	PB_USERNS,
 
 	/* PB_AUTOGEN_STOP */
 
 	PB_PAGEMAP_HEAD,
 	PB_IDS,
 	PB_SIGACT,
-	PB_NETDEV,
-	PB_REMAP_FPATH,		/* 50 */
+	PB_NETDEV,		/* 50 */
+	PB_REMAP_FPATH,
 	PB_SK_QUEUES,
 	PB_IPCNS_MSG,
 	PB_IPCNS_MSG_ENT,
diff --git a/include/syscall-types.h b/include/syscall-types.h
index bab3dba..eb270b3 100644
--- a/include/syscall-types.h
+++ b/include/syscall-types.h
@@ -57,7 +57,11 @@ struct itimerspec;
 #define CLONE_NEWNET	0x40000000
 #endif
 
-#define CLONE_ALLNS	(CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS)
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER	0x10000000
+#endif
+
+#define CLONE_ALLNS	(CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER)
 
 /* Nested namespaces are supported only for these types */
 #define CLONE_SUBNS	(CLONE_NEWNS)
diff --git a/include/user_ns.h b/include/user_ns.h
new file mode 100644
index 0000000..715b155
--- /dev/null
+++ b/include/user_ns.h
@@ -0,0 +1,9 @@
+#ifndef __CR_USER_NS_H__
+#define __CR_USER_NS_H__
+
+extern int dump_user_ns(int ns_pid, int ns_id);
+extern int restore_user_ns(int real_pid, int ns_id);
+
+extern struct ns_desc user_ns_desc;
+
+#endif /* __CR_USER_NS_H__ */
diff --git a/namespaces.c b/namespaces.c
index 6be030f..8c0d842 100644
--- a/namespaces.c
+++ b/namespaces.c
@@ -9,6 +9,7 @@
 #include "uts_ns.h"
 #include "ipc_ns.h"
 #include "mount.h"
+#include "user_ns.h"
 #include "pstree.h"
 #include "namespaces.h"
 #include "net.h"
@@ -271,7 +272,7 @@ struct ns_file_info {
 static int open_ns_fd(struct file_desc *d)
 {
 	struct ns_file_info *nfi = container_of(d, struct ns_file_info, d);
-	struct pstree_item *item, *t;
+	struct pstree_item *item = NULL, *t;
 	struct ns_desc *nd = NULL;
 	char path[64];
 	int fd;
@@ -304,6 +305,10 @@ static int open_ns_fd(struct file_desc *d)
 			item = t;
 			nd = &mnt_ns_desc;
 			break;
+		} else if (ids->user_ns_id == nfi->nfe->ns_id) {
+			item = t;
+			nd = &user_ns_desc;
+			break;
 		}
 	}
 
@@ -391,6 +396,13 @@ int dump_task_ns_ids(struct pstree_item *item)
 		return -1;
 	}
 
+	ids->has_user_ns_id = true;
+	ids->user_ns_id = get_ns_id(pid, &user_ns_desc);
+	if (!ids->user_ns_id) {
+		pr_err("Can't make userns id\n");
+		return -1;
+	}
+
 	return 0;
 }
 
@@ -446,6 +458,11 @@ static int do_dump_namespaces(struct ns_id *ns)
 				ns->id, ns->pid);
 		ret = dump_net_ns(ns->pid, ns->id);
 		break;
+	case CLONE_NEWUSER:
+		pr_info("Dump USER namespace info %d via %d\n",
+				ns->id, ns->pid);
+		ret = dump_user_ns(ns->pid, ns->id);
+		break;
 	default:
 		pr_err("Unknown namespace flag %x", ns->nd->cflag);
 		break;
@@ -604,9 +621,15 @@ int try_show_namespaces(int ns_pid)
 		close(fd);
 	}
 
+	fd = open_image(CR_FD_USERNS, O_SHOW, ids->user_ns_id);
+	if (fd > 0) {
+		pr_msg("-------------------USERNS---------------------\n");
+		cr_parse_fd(fd, fdset_template[CR_FD_USERNS].magic);
+		close(fd);
+	}
+
 	pr_msg("---[ end of %d namespaces ]---\n", ns_pid);
 	return 0;
 }
 
 struct ns_desc pid_ns_desc = NS_DESC_ENTRY(CLONE_NEWPID, "pid");
-struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
diff --git a/protobuf-desc.c b/protobuf-desc.c
index b97418b..9199b09 100644
--- a/protobuf-desc.c
+++ b/protobuf-desc.c
@@ -38,6 +38,7 @@
 #include "protobuf/sk-packet.pb-c.h"
 #include "protobuf/creds.pb-c.h"
 #include "protobuf/timer.pb-c.h"
+#include "protobuf/userns.pb-c.h"
 #include "protobuf/utsns.pb-c.h"
 #include "protobuf/ipc-var.pb-c.h"
 #include "protobuf/ipc-shm.pb-c.h"
diff --git a/protobuf/Makefile b/protobuf/Makefile
index 7f6485b..cd2b854 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -50,6 +50,7 @@ proto-obj-y	+= ipc-shm.o
 proto-obj-y	+= ipc-msg.o
 proto-obj-y	+= ipc-sem.o
 proto-obj-y	+= utsns.o
+proto-obj-y	+= userns.o
 proto-obj-y	+= creds.o
 proto-obj-y	+= vma.o
 proto-obj-y	+= netdev.o
diff --git a/protobuf/core.proto b/protobuf/core.proto
index d850e2e..8810376 100644
--- a/protobuf/core.proto
+++ b/protobuf/core.proto
@@ -32,6 +32,7 @@ message task_kobj_ids_entry {
 	optional uint32			ipc_ns_id	= 7;
 	optional uint32			uts_ns_id	= 8;
 	optional uint32			mnt_ns_id	= 9;
+	optional uint32			user_ns_id	= 10;
 }
 
 message thread_sas_entry {
diff --git a/protobuf/userns.proto b/protobuf/userns.proto
new file mode 100644
index 0000000..31d7718
--- /dev/null
+++ b/protobuf/userns.proto
@@ -0,0 +1,9 @@
+message userns_entry {
+	message map_entry {
+		required uint32 id_in	= 1;
+		required uint32 id_out	= 2;
+		required uint32 length	= 3;
+	}
+	repeated map_entry uid_map = 1;
+	repeated map_entry gid_map = 2;
+}
\ No newline at end of file
diff --git a/pstree.c b/pstree.c
index d005b64..c905317 100644
--- a/pstree.c
+++ b/pstree.c
@@ -603,6 +603,8 @@ static unsigned long get_clone_mask(TaskKobjIdsEntry *i,
 		mask |= CLONE_NEWUTS;
 	if (i->mnt_ns_id != p->mnt_ns_id)
 		mask |= CLONE_NEWNS;
+	if (i->user_ns_id != p->user_ns_id)
+		mask |= CLONE_NEWUSER;
 
 	return mask;
 }
diff --git a/user_ns.c b/user_ns.c
new file mode 100644
index 0000000..e90f068
--- /dev/null
+++ b/user_ns.c
@@ -0,0 +1,227 @@
+#include <unistd.h>
+
+#include "namespaces.h"
+#include "user_ns.h"
+#include "list.h"
+
+#include "protobuf.h"
+#include "protobuf/userns.pb-c.h"
+
+struct map_entry {
+	UsernsEntry__MapEntry   entry;
+	struct list_head	list;
+};
+
+static void cleanup(int *fd, struct list_head *uid_list,
+		    struct list_head *gid_list)
+{
+	struct map_entry *pos, *tmp;
+
+	/* free uid entries */
+	list_for_each_entry_safe(pos, tmp, uid_list, list) {
+		list_del(&pos->list);
+		xfree(pos);
+	}
+
+	/* free gid entries */
+	list_for_each_entry_safe(pos, tmp, gid_list, list) {
+		list_del(&pos->list);
+		xfree(pos);
+	}
+
+	close_safe(fd);
+}
+
+static void fill_map(int n_entries, UsernsEntry__MapEntry **map,
+		     struct list_head *head)
+{
+	struct map_entry *tmp;
+
+	list_for_each_entry(tmp, head, list) {
+		map[--n_entries] = &(tmp->entry);
+	}
+}
+
+static int write_pb(int fd, int n_uid_entries, int n_gid_entries,
+		    struct list_head *uid_list, struct list_head *gid_list)
+{
+	UsernsEntry ue = USERNS_ENTRY__INIT;
+
+	UsernsEntry__MapEntry *uidmap[n_uid_entries];
+	fill_map(n_uid_entries, uidmap, uid_list);
+	ue.uid_map = uidmap;
+	ue.n_uid_map = n_uid_entries;
+
+	UsernsEntry__MapEntry *gidmap[n_gid_entries];
+	fill_map(n_gid_entries, gidmap, gid_list);
+	ue.gid_map = gidmap;
+	ue.n_gid_map = n_gid_entries;
+
+	return pb_write_one(fd, &ue, PB_USERNS);
+}
+
+static int read_map_entries(int pid, const char *id_map, struct list_head *head)
+{
+	int n_read, n_entries = 0;
+	FILE *fp;
+	struct map_entry *tmp;
+	UsernsEntry__MapEntry entry = USERNS_ENTRY__MAP_ENTRY__INIT;
+
+	pr_debug("Reading entries from /proc/%d/%s\n", pid, id_map);
+
+	fp = fopen_proc(pid, "%s", id_map);
+	if (!fp) {
+		pr_perror("Error opening /proc/%d/%s\n", pid, id_map);
+		return -1;
+	}
+
+	while ((n_read = fscanf(fp, "%u %u %u\n", &entry.id_in,
+				&entry.id_out, &entry.length)) != EOF) {
+		if (n_read != 3) {
+			pr_perror("Error reading /proc/%d/%s, fscanf returned %d",
+				  pid, id_map, n_read);
+			fclose(fp);
+			return -1;
+		}
+
+		tmp = (struct map_entry *)xmalloc(sizeof(struct map_entry));
+		if (!tmp) {
+			fclose(fp);
+			return -1;
+		}
+
+		memcpy(&tmp->entry, &entry, sizeof(UsernsEntry__MapEntry));
+		list_add(&(tmp->list), head);
+		n_entries++;
+	}
+
+	if (fclose(fp) != 0) {
+		pr_perror("fclose(/proc/%d/%s) failed", pid, id_map);
+		return -1;
+	}
+
+	return n_entries;
+}
+
+static int write_map_entries(int pid, const char *id_map,
+                             UsernsEntry__MapEntry **map,
+                             size_t n_entries)
+{
+	int i, fd, n_written, total = 0;
+	char buf[PAGE_SIZE];
+
+	pr_debug("Writing entries to /proc/%d/%s, n_entries=%lu\n",
+		 pid, id_map, n_entries);
+
+	for (i = 0; i < n_entries; i++) {
+		n_written = snprintf(buf + total, sizeof(buf) - total,
+				     "%u %u %u\n",
+				      map[i]->id_in,
+				      map[i]->id_out,
+				      map[i]->length);
+		if (n_written < 0) {
+			pr_err("snprintf failed for %s of pid: %d\n", id_map, pid);
+			return -1;
+		}
+		total += n_written;
+	}
+
+	/* id_maps can only be written to once */
+	fd = open_proc_rw(pid, "%s", id_map);
+	if (fd < 0) {
+		pr_perror("Unable to open /proc/%d/%s\n", pid, id_map);
+		return -1;
+	}
+
+	if (write(fd, buf, total) != total) {
+		pr_perror("Failed to write all %d bytes to /proc/%d/%s",
+			  total, pid, id_map);
+		close_safe(&fd);
+		return -1;
+	}
+
+	close_safe(&fd);
+
+	return 0;
+}
+
+int dump_user_ns(int ns_pid, int ns_id)
+{
+	int fd, ret, n_uid_entries, n_gid_entries;
+
+	LIST_HEAD(uid_list);
+	LIST_HEAD(gid_list);
+
+	fd = open_image(CR_FD_USERNS, O_DUMP, ns_id);
+	if (fd < 0) {
+		pr_err("Error opening userns image");
+		return -1;
+	}
+
+	/* read uid map */
+	n_uid_entries = read_map_entries(ns_pid, "uid_map", &uid_list);
+	if (n_uid_entries < 0) {
+		pr_err("Error reading uid_map\n");
+		ret = -1;
+		goto out;
+	}
+
+	/* read gid map */
+	n_gid_entries = read_map_entries(ns_pid, "gid_map", &gid_list);
+	if (n_gid_entries < 0) {
+		pr_err("Error reading gid_map\n");
+		ret = -1;
+		goto out;
+	}
+
+	ret = write_pb(fd, n_uid_entries, n_gid_entries, &uid_list, &gid_list);
+
+out:
+	cleanup(&fd, &uid_list, &gid_list);
+	return ret;
+}
+
+/*
+ * Restore uid_map and gid_map file for the init process. Since this is called
+ * from the parent, we access these files using the 'real_pid' of the process.
+ */
+int restore_user_ns(int real_pid, int ns_id)
+{
+	int fd, ret = 0;
+	UsernsEntry *ue;
+
+	pr_info("Restoring user namespace for real_pid:%d\n", real_pid);
+
+	fd = open_image(CR_FD_USERNS, O_RSTR, ns_id);
+	if (fd < 0)
+		return -1;
+
+	ret = pb_read_one(fd, &ue, PB_USERNS);
+	if (ret < 0)
+		return -1;
+
+	pr_info("userns restoring: n_uid_map:%lu ; n_gid_map:%lu\n",
+		ue->n_uid_map, ue->n_gid_map);
+
+	/* restore uid_map */
+	ret = write_map_entries(real_pid, "uid_map", ue->uid_map, ue->n_uid_map);
+	if (ret < 0) {
+		pr_err("Failed to restore /proc/%d/uid_map\n", real_pid);
+		goto out;
+	}
+
+	/* restore gid_map */
+	ret = write_map_entries(real_pid, "gid_map", ue->gid_map, ue->n_gid_map);
+	if (ret < 0) {
+		pr_err("Failed to restore /proc/%d/gid_map", real_pid);
+		goto out;
+	}
+
+out:
+	userns_entry__free_unpacked(ue, NULL);
+
+	close_safe(&fd);
+	return ret;
+}
+
+struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
-- 
2.1.0.rc2.206.gedb03e5



More information about the CRIU mailing list