[CRIU] [PATCH 1/4] Support for dumping/restoring user namespaces
Sophie Blee-Goldman
ableegoldman at google.com
Fri Aug 8 22:21:19 PDT 2014
Adds basic support for user namespaces by dumping and restoring
the namespace itself and the uid/gid maps of the root process.
Currently depends on a kernel patch to avoid failing on the prctl
syscall by checking for CAP_SYS_RESOURCE in the user namespace
instead of in the global one.
Signed-off-by: Sophie Blee-Goldman <ableegoldman at google.com>
diff --git a/Makefile.crtools b/Makefile.crtools
index 6033b2c..8e680d6 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -34,6 +34,7 @@ obj-y += pipes.o
obj-y += fifo.o
obj-y += file-ids.o
obj-y += namespaces.o
+obj-y += user_ns.o
obj-y += uts_ns.o
obj-y += ipc_ns.o
obj-y += netfilter.o
diff --git a/cr-restore.c b/cr-restore.c
index 3c36323..3c94b93 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -52,6 +52,7 @@
#include "restorer-blob.h"
#include "crtools.h"
#include "namespaces.h"
+#include "user_ns.h"
#include "mem.h"
#include "mount.h"
#include "fsnotify.h"
@@ -1630,6 +1631,12 @@ static int restore_root_task(struct pstree_item *init)
if (ret)
goto out;
+ if (root_ns_mask & CLONE_NEWUSER) {
+ ret = restore_user_ns(init->pid.real, init->ids->user_ns_id);
+ if (ret < 0)
+ goto out;
+ }
+
ret = run_scripts("setup-namespaces");
if (ret)
goto out;
diff --git a/cr-show.c b/cr-show.c
index 5549c8d..fd33196 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -21,6 +21,7 @@
#include "util.h"
#include "sockets.h"
#include "image.h"
+#include "user_ns.h"
#include "uts_ns.h"
#include "ipc_ns.h"
#include "pstree.h"
@@ -291,6 +292,7 @@ static struct show_image_info show_infos[] = {
SHOW_VERT(CORE),
SHOW_VERT(IDS),
SHOW_VERT(CREDS),
+ SHOW_VERT(USERNS),
SHOW_VERT(UTSNS),
SHOW_VERT(IPC_VAR),
SHOW_VERT(FS),
diff --git a/image-desc.c b/image-desc.c
index 1e0e3f0..a9859f3 100644
--- a/image-desc.c
+++ b/image-desc.c
@@ -52,6 +52,7 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
FD_ENTRY(POSIX_TIMERS, "posix-timers-%d"),
FD_ENTRY(CREDS, "creds-%d"),
FD_ENTRY(UTSNS, "utsns-%d"),
+ FD_ENTRY(USERNS, "userns-%d"),
FD_ENTRY(IPC_VAR, "ipcns-var-%d"),
FD_ENTRY(IPCNS_SHM, "ipcns-shm-%d"),
FD_ENTRY(IPCNS_MSG, "ipcns-msg-%d"),
diff --git a/include/image-desc.h b/include/image-desc.h
index eb42990..2db5237 100644
--- a/include/image-desc.h
+++ b/include/image-desc.h
@@ -26,6 +26,7 @@ enum {
/*
* NS entries
*/
+ CR_FD_USERNS,
CR_FD_UTSNS,
CR_FD_MNTS,
diff --git a/include/magic.h b/include/magic.h
index 5192a60..06db3e3 100644
--- a/include/magic.h
+++ b/include/magic.h
@@ -40,6 +40,7 @@
#define ITIMERS_MAGIC 0x57464056 /* Kostroma */
#define POSIX_TIMERS_MAGIC 0x52603957 /* Lipetsk */
#define SK_QUEUES_MAGIC 0x56264026 /* Suzdal */
+#define USERNS_MAGIC 0x55474908 /* Kazan */
#define UTSNS_MAGIC 0x54473203 /* Smolensk */
#define CREDS_MAGIC 0x54023547 /* Kozelsk */
#define IPC_VAR_MAGIC 0x53115007 /* Samara */
diff --git a/include/namespaces.h b/include/namespaces.h
index 350b8b4..bc67519 100644
--- a/include/namespaces.h
+++ b/include/namespaces.h
@@ -34,7 +34,6 @@ extern struct ns_id *ns_ids;
extern bool check_ns_proc(struct fd_link *link);
extern struct ns_desc pid_ns_desc;
-extern struct ns_desc user_ns_desc;
extern unsigned long root_ns_mask;
extern const struct fdtype_ops nsfile_dump_ops;
diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
index 01c9f4c..1c8f9ce 100644
--- a/include/protobuf-desc.h
+++ b/include/protobuf-desc.h
@@ -52,14 +52,15 @@ enum {
PB_IRMAP_CACHE,
PB_CGROUP,
PB_TIMERFD,
+ PB_USERNS,
/* PB_AUTOGEN_STOP */
PB_PAGEMAP_HEAD,
PB_IDS,
PB_SIGACT,
- PB_NETDEV,
- PB_REMAP_FPATH, /* 50 */
+ PB_NETDEV, /* 50 */
+ PB_REMAP_FPATH,
PB_SK_QUEUES,
PB_IPCNS_MSG,
PB_IPCNS_MSG_ENT,
diff --git a/include/syscall-types.h b/include/syscall-types.h
index bab3dba..eb270b3 100644
--- a/include/syscall-types.h
+++ b/include/syscall-types.h
@@ -57,7 +57,11 @@ struct itimerspec;
#define CLONE_NEWNET 0x40000000
#endif
-#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS)
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER)
/* Nested namespaces are supported only for these types */
#define CLONE_SUBNS (CLONE_NEWNS)
diff --git a/include/user_ns.h b/include/user_ns.h
new file mode 100644
index 0000000..715b155
--- /dev/null
+++ b/include/user_ns.h
@@ -0,0 +1,9 @@
+#ifndef __CR_USER_NS_H__
+#define __CR_USER_NS_H__
+
+extern int dump_user_ns(int ns_pid, int ns_id);
+extern int restore_user_ns(int real_pid, int ns_id);
+
+extern struct ns_desc user_ns_desc;
+
+#endif /* __CR_USER_NS_H__ */
diff --git a/namespaces.c b/namespaces.c
index 6be030f..8c0d842 100644
--- a/namespaces.c
+++ b/namespaces.c
@@ -9,6 +9,7 @@
#include "uts_ns.h"
#include "ipc_ns.h"
#include "mount.h"
+#include "user_ns.h"
#include "pstree.h"
#include "namespaces.h"
#include "net.h"
@@ -271,7 +272,7 @@ struct ns_file_info {
static int open_ns_fd(struct file_desc *d)
{
struct ns_file_info *nfi = container_of(d, struct ns_file_info, d);
- struct pstree_item *item, *t;
+ struct pstree_item *item = NULL, *t;
struct ns_desc *nd = NULL;
char path[64];
int fd;
@@ -304,6 +305,10 @@ static int open_ns_fd(struct file_desc *d)
item = t;
nd = &mnt_ns_desc;
break;
+ } else if (ids->user_ns_id == nfi->nfe->ns_id) {
+ item = t;
+ nd = &user_ns_desc;
+ break;
}
}
@@ -391,6 +396,13 @@ int dump_task_ns_ids(struct pstree_item *item)
return -1;
}
+ ids->has_user_ns_id = true;
+ ids->user_ns_id = get_ns_id(pid, &user_ns_desc);
+ if (!ids->user_ns_id) {
+ pr_err("Can't make userns id\n");
+ return -1;
+ }
+
return 0;
}
@@ -446,6 +458,11 @@ static int do_dump_namespaces(struct ns_id *ns)
ns->id, ns->pid);
ret = dump_net_ns(ns->pid, ns->id);
break;
+ case CLONE_NEWUSER:
+ pr_info("Dump USER namespace info %d via %d\n",
+ ns->id, ns->pid);
+ ret = dump_user_ns(ns->pid, ns->id);
+ break;
default:
pr_err("Unknown namespace flag %x", ns->nd->cflag);
break;
@@ -604,9 +621,15 @@ int try_show_namespaces(int ns_pid)
close(fd);
}
+ fd = open_image(CR_FD_USERNS, O_SHOW, ids->user_ns_id);
+ if (fd > 0) {
+ pr_msg("-------------------USERNS---------------------\n");
+ cr_parse_fd(fd, fdset_template[CR_FD_USERNS].magic);
+ close(fd);
+ }
+
pr_msg("---[ end of %d namespaces ]---\n", ns_pid);
return 0;
}
struct ns_desc pid_ns_desc = NS_DESC_ENTRY(CLONE_NEWPID, "pid");
-struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
diff --git a/protobuf-desc.c b/protobuf-desc.c
index b97418b..9199b09 100644
--- a/protobuf-desc.c
+++ b/protobuf-desc.c
@@ -38,6 +38,7 @@
#include "protobuf/sk-packet.pb-c.h"
#include "protobuf/creds.pb-c.h"
#include "protobuf/timer.pb-c.h"
+#include "protobuf/userns.pb-c.h"
#include "protobuf/utsns.pb-c.h"
#include "protobuf/ipc-var.pb-c.h"
#include "protobuf/ipc-shm.pb-c.h"
diff --git a/protobuf/Makefile b/protobuf/Makefile
index 7f6485b..cd2b854 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -50,6 +50,7 @@ proto-obj-y += ipc-shm.o
proto-obj-y += ipc-msg.o
proto-obj-y += ipc-sem.o
proto-obj-y += utsns.o
+proto-obj-y += userns.o
proto-obj-y += creds.o
proto-obj-y += vma.o
proto-obj-y += netdev.o
diff --git a/protobuf/core.proto b/protobuf/core.proto
index d850e2e..8810376 100644
--- a/protobuf/core.proto
+++ b/protobuf/core.proto
@@ -32,6 +32,7 @@ message task_kobj_ids_entry {
optional uint32 ipc_ns_id = 7;
optional uint32 uts_ns_id = 8;
optional uint32 mnt_ns_id = 9;
+ optional uint32 user_ns_id = 10;
}
message thread_sas_entry {
diff --git a/protobuf/userns.proto b/protobuf/userns.proto
new file mode 100644
index 0000000..31d7718
--- /dev/null
+++ b/protobuf/userns.proto
@@ -0,0 +1,9 @@
+message userns_entry {
+ message map_entry {
+ required uint32 id_in = 1;
+ required uint32 id_out = 2;
+ required uint32 length = 3;
+ }
+ repeated map_entry uid_map = 1;
+ repeated map_entry gid_map = 2;
+}
\ No newline at end of file
diff --git a/pstree.c b/pstree.c
index d005b64..c905317 100644
--- a/pstree.c
+++ b/pstree.c
@@ -603,6 +603,8 @@ static unsigned long get_clone_mask(TaskKobjIdsEntry *i,
mask |= CLONE_NEWUTS;
if (i->mnt_ns_id != p->mnt_ns_id)
mask |= CLONE_NEWNS;
+ if (i->user_ns_id != p->user_ns_id)
+ mask |= CLONE_NEWUSER;
return mask;
}
diff --git a/user_ns.c b/user_ns.c
new file mode 100644
index 0000000..ef92f2d
--- /dev/null
+++ b/user_ns.c
@@ -0,0 +1,228 @@
+#include <unistd.h>
+
+#include "namespaces.h"
+#include "user_ns.h"
+#include "list.h"
+
+#include "protobuf.h"
+#include "protobuf/userns.pb-c.h"
+
+struct map_entry {
+ UsernsEntry__MapEntry entry;
+ struct list_head list;
+};
+
+static void cleanup(int *fd, struct list_head *uid_list,
+ struct list_head *gid_list)
+{
+ struct map_entry *pos, *tmp;
+
+ /* free uid entries */
+ list_for_each_entry_safe(pos, tmp, uid_list, list) {
+ list_del(&pos->list);
+ xfree(pos);
+ }
+
+ /* free gid entries */
+ list_for_each_entry_safe(pos, tmp, gid_list, list) {
+ list_del(&pos->list);
+ xfree(pos);
+ }
+
+ close_safe(fd);
+}
+
+static void fill_map(int n_entries, UsernsEntry__MapEntry **map,
+ struct list_head *head)
+{
+ struct map_entry *tmp;
+
+ list_for_each_entry(tmp, head, list) {
+ map[--n_entries] = &(tmp->entry);
+ }
+}
+
+static int write_pb(int fd, int n_uid_entries, int n_gid_entries,
+ struct list_head *uid_list, struct list_head *gid_list)
+{
+ UsernsEntry ue = USERNS_ENTRY__INIT;
+
+ UsernsEntry__MapEntry *uidmap[n_uid_entries];
+ fill_map(n_uid_entries, uidmap, uid_list);
+ ue.uid_map = uidmap;
+ ue.n_uid_map = n_uid_entries;
+
+ UsernsEntry__MapEntry *gidmap[n_gid_entries];
+ fill_map(n_gid_entries, gidmap, gid_list);
+ ue.gid_map = gidmap;
+ ue.n_gid_map = n_gid_entries;
+
+ return pb_write_one(fd, &ue, PB_USERNS);
+}
+
+static int read_map_entries(const char *map_fname, struct list_head *head)
+{
+ int n_read, n_entries = 0;
+ FILE *fp;
+ struct map_entry *tmp;
+ UsernsEntry__MapEntry entry = USERNS_ENTRY__MAP_ENTRY__INIT;
+
+ pr_debug("Reading entries from %s\n", map_fname);
+
+ fp = fopen(map_fname, "r");
+ if (!fp) {
+ pr_perror("Error opening %s\n", map_fname);
+ return -1;
+ }
+
+ while ((n_read = fscanf(fp, "%u %u %u\n", &entry.id_in,
+ &entry.id_out, &entry.length)) != EOF) {
+ if (n_read != 3) {
+ pr_perror("Error reading %s, fscanf returned %d",
+ map_fname, n_read);
+ fclose(fp);
+ return -1;
+ }
+
+ tmp = (struct map_entry *)xmalloc(sizeof(struct map_entry));
+ if (!tmp) {
+ fclose(fp);
+ return -1;
+ }
+
+ memcpy(&tmp->entry, &entry, sizeof(UsernsEntry__MapEntry));
+ list_add(&(tmp->list), head);
+ n_entries++;
+ }
+
+ if (fclose(fp) != 0) {
+ pr_perror("fclose(%s) failed", map_fname);
+ return -1;
+ }
+
+ return n_entries;
+}
+
+static int write_map_entries(const char *map_fname,
+ UsernsEntry__MapEntry **map,
+ size_t n_entries)
+{
+ int i = 0, bytes_written;
+ FILE *fp;
+
+ pr_debug("Writing entries to %s, n_entries=%lu\n",
+ map_fname, n_entries);
+
+ fp = fopen(map_fname, "w");
+ if (!fp) {
+ pr_perror("Unable to open %s\n", map_fname);
+ return -1;
+ }
+
+ while (i < n_entries) {
+ bytes_written = fprintf(fp, "%u %u %u\n",
+ map[i]->id_in,
+ map[i]->id_out,
+ map[i]->length);
+ if (bytes_written < 0) {
+ pr_err("fprintf to %s failed.\n", map_fname);
+ fclose(fp);
+ return -1;
+ }
+ ++i;
+ }
+
+ if (fclose(fp) != 0) {
+ pr_perror("fclose(%s) failed", map_fname);
+ return -1;
+ }
+
+ return 0;
+}
+
+int dump_user_ns(int ns_pid, int ns_id)
+{
+ int fd, ret, n_uid_entries, n_gid_entries;
+ char map_fname[PATH_MAX];
+
+ LIST_HEAD(uid_list);
+ LIST_HEAD(gid_list);
+
+ fd = open_image(CR_FD_USERNS, O_DUMP, ns_id);
+ if (fd < 0) {
+ pr_err("Error opening userns image");
+ return -1;
+ }
+
+ /* read uid map */
+ sprintf(map_fname, "/proc/%d/uid_map", ns_pid);
+ n_uid_entries = read_map_entries(map_fname, &uid_list);
+ if (n_uid_entries < 0) {
+ pr_err("Error reading uid_map\n");
+ ret = -1;
+ goto out;
+ }
+
+ /* read gid map */
+ sprintf(map_fname, "/proc/%d/gid_map", ns_pid);
+ n_gid_entries = read_map_entries(map_fname, &gid_list);
+ if (n_gid_entries < 0) {
+ pr_err("Error reading gid_map\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = write_pb(fd, n_uid_entries, n_gid_entries, &uid_list, &gid_list);
+
+out:
+ cleanup(&fd, &uid_list, &gid_list);
+ return ret;
+}
+
+/*
+ * Restore uid_map and gid_map file for the init process. Since this is called
+ * from the parent, we access these files using the 'real_pid' of the process.
+ */
+int restore_user_ns(int real_pid, int ns_id)
+{
+ int fd, ret = 0;
+ UsernsEntry *ue;
+ char map_fname[PATH_MAX];
+
+ pr_info("Restoring user namespace for real_pid:%d\n", real_pid);
+
+ fd = open_image(CR_FD_USERNS, O_RSTR, ns_id);
+ if (fd < 0)
+ return -1;
+
+ ret = pb_read_one(fd, &ue, PB_USERNS);
+ if (ret < 0)
+ return -1;
+
+ pr_info("userns restoring: n_uid_map:%lu ; n_gid_map:%lu\n",
+ ue->n_uid_map, ue->n_gid_map);
+
+ /* restore uid_map */
+ sprintf(map_fname, "/proc/%d/uid_map", real_pid);
+ ret = write_map_entries(map_fname, ue->uid_map, ue->n_uid_map);
+ if (ret < 0) {
+ pr_err("Failed to restore %s\n", map_fname);
+ goto out;
+ }
+
+ /* restore gid_map */
+ sprintf(map_fname, "/proc/%d/gid_map", real_pid);
+ ret = write_map_entries(map_fname, ue->gid_map, ue->n_gid_map);
+ if (ret < 0) {
+ pr_err("Failed to restore %s\n", map_fname);
+ goto out;
+ }
+
+out:
+ userns_entry__free_unpacked(ue, NULL);
+
+ close_safe(&fd);
+ return ret;
+}
+
+struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
--
2.0.0.526.g5318336
More information about the CRIU
mailing list