[CRIU] [PATCH 1/4] Support for dumping/restoring user namespaces

Sophie Blee-Goldman ableegoldman at google.com
Tue Aug 12 15:58:48 PDT 2014


Here is the patch with the fixes you pointed out. I am still looking at the
zdtm tests at the moment.


On Tue, Aug 12, 2014 at 3:56 PM, Sophie Blee-Goldman <
ableegoldman at google.com> wrote:

> Adds basic support for user namespaces by dumping and restoring
> the namespace itself and the uid/gid maps of the root process.
>
> Currently depends on a kernel patch to avoid failing on the prctl
> syscall by checking for CAP_SYS_RESOURCE in the user namespace
> instead of in the global one.
>
> Signed-off-by: Sophie Blee-Goldman <ableegoldman at google.com>
> ---
>  Makefile.crtools        |   1 +
>  cr-restore.c            |   7 ++
>  cr-show.c               |   2 +
>  image-desc.c            |   1 +
>  include/image-desc.h    |   1 +
>  include/magic.h         |   1 +
>  include/namespaces.h    |   1 -
>  include/protobuf-desc.h |   5 +-
>  include/syscall-types.h |   6 +-
>  include/user_ns.h       |   9 ++
>  namespaces.c            |  27 +++++-
>  protobuf-desc.c         |   1 +
>  protobuf/Makefile       |   1 +
>  protobuf/core.proto     |   1 +
>  protobuf/userns.proto   |   9 ++
>  pstree.c                |   2 +
>  user_ns.c               | 227
> ++++++++++++++++++++++++++++++++++++++++++++++++
>  17 files changed, 296 insertions(+), 6 deletions(-)
>  create mode 100644 include/user_ns.h
>  create mode 100644 protobuf/userns.proto
>  create mode 100644 user_ns.c
>
> diff --git a/Makefile.crtools b/Makefile.crtools
> index 6033b2c..8e680d6 100644
> --- a/Makefile.crtools
> +++ b/Makefile.crtools
> @@ -34,6 +34,7 @@ obj-y += pipes.o
>  obj-y  += fifo.o
>  obj-y  += file-ids.o
>  obj-y  += namespaces.o
> +obj-y  += user_ns.o
>  obj-y  += uts_ns.o
>  obj-y  += ipc_ns.o
>  obj-y  += netfilter.o
> diff --git a/cr-restore.c b/cr-restore.c
> index 2bc98e8..a93fa74 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -52,6 +52,7 @@
>  #include "restorer-blob.h"
>  #include "crtools.h"
>  #include "namespaces.h"
> +#include "user_ns.h"
>  #include "mem.h"
>  #include "mount.h"
>  #include "fsnotify.h"
> @@ -1612,6 +1613,12 @@ static int restore_root_task(struct pstree_item
> *init)
>         if (ret)
>                 goto out;
>
> +       if (root_ns_mask & CLONE_NEWUSER) {
> +               ret = restore_user_ns(init->pid.real,
> init->ids->user_ns_id);
> +               if (ret < 0)
> +                       goto out;
> +       }
> +
>         ret = run_scripts("setup-namespaces");
>         if (ret)
>                 goto out;
> diff --git a/cr-show.c b/cr-show.c
> index 0e1a2c6..2b28746 100644
> --- a/cr-show.c
> +++ b/cr-show.c
> @@ -21,6 +21,7 @@
>  #include "util.h"
>  #include "sockets.h"
>  #include "image.h"
> +#include "user_ns.h"
>  #include "uts_ns.h"
>  #include "ipc_ns.h"
>  #include "pstree.h"
> @@ -291,6 +292,7 @@ static struct show_image_info show_infos[] = {
>         SHOW_VERT(CORE),
>         SHOW_VERT(IDS),
>         SHOW_VERT(CREDS),
> +       SHOW_VERT(USERNS),
>         SHOW_VERT(UTSNS),
>         SHOW_VERT(IPC_VAR),
>         SHOW_VERT(FS),
> diff --git a/image-desc.c b/image-desc.c
> index 49dc29d..814c3b2 100644
> --- a/image-desc.c
> +++ b/image-desc.c
> @@ -52,6 +52,7 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
>         FD_ENTRY(POSIX_TIMERS,  "posix-timers-%d"),
>         FD_ENTRY(CREDS,         "creds-%d"),
>         FD_ENTRY(UTSNS,         "utsns-%d"),
> +       FD_ENTRY(USERNS,        "userns-%d"),
>         FD_ENTRY(IPC_VAR,       "ipcns-var-%d"),
>         FD_ENTRY(IPCNS_SHM,     "ipcns-shm-%d"),
>         FD_ENTRY(IPCNS_MSG,     "ipcns-msg-%d"),
> diff --git a/include/image-desc.h b/include/image-desc.h
> index 93b3392..18535e1 100644
> --- a/include/image-desc.h
> +++ b/include/image-desc.h
> @@ -25,6 +25,7 @@ enum {
>         /*
>          * NS entries
>          */
> +       CR_FD_USERNS,
>         CR_FD_UTSNS,
>         CR_FD_MNTS,
>
> diff --git a/include/magic.h b/include/magic.h
> index 5192a60..06db3e3 100644
> --- a/include/magic.h
> +++ b/include/magic.h
> @@ -40,6 +40,7 @@
>  #define ITIMERS_MAGIC          0x57464056 /* Kostroma */
>  #define POSIX_TIMERS_MAGIC     0x52603957 /* Lipetsk */
>  #define SK_QUEUES_MAGIC                0x56264026 /* Suzdal */
> +#define USERNS_MAGIC           0x55474908 /* Kazan */
>  #define UTSNS_MAGIC            0x54473203 /* Smolensk */
>  #define CREDS_MAGIC            0x54023547 /* Kozelsk */
>  #define IPC_VAR_MAGIC          0x53115007 /* Samara */
> diff --git a/include/namespaces.h b/include/namespaces.h
> index 350b8b4..bc67519 100644
> --- a/include/namespaces.h
> +++ b/include/namespaces.h
> @@ -34,7 +34,6 @@ extern struct ns_id *ns_ids;
>  extern bool check_ns_proc(struct fd_link *link);
>
>  extern struct ns_desc pid_ns_desc;
> -extern struct ns_desc user_ns_desc;
>  extern unsigned long root_ns_mask;
>
>  extern const struct fdtype_ops nsfile_dump_ops;
> diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
> index 01c9f4c..1c8f9ce 100644
> --- a/include/protobuf-desc.h
> +++ b/include/protobuf-desc.h
> @@ -52,14 +52,15 @@ enum {
>         PB_IRMAP_CACHE,
>         PB_CGROUP,
>         PB_TIMERFD,
> +       PB_USERNS,
>
>         /* PB_AUTOGEN_STOP */
>
>         PB_PAGEMAP_HEAD,
>         PB_IDS,
>         PB_SIGACT,
> -       PB_NETDEV,
> -       PB_REMAP_FPATH,         /* 50 */
> +       PB_NETDEV,              /* 50 */
> +       PB_REMAP_FPATH,
>         PB_SK_QUEUES,
>         PB_IPCNS_MSG,
>         PB_IPCNS_MSG_ENT,
> diff --git a/include/syscall-types.h b/include/syscall-types.h
> index bab3dba..eb270b3 100644
> --- a/include/syscall-types.h
> +++ b/include/syscall-types.h
> @@ -57,7 +57,11 @@ struct itimerspec;
>  #define CLONE_NEWNET   0x40000000
>  #endif
>
> -#define CLONE_ALLNS    (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC |
> CLONE_NEWUTS | CLONE_NEWNS)
> +#ifndef CLONE_NEWUSER
> +#define CLONE_NEWUSER  0x10000000
> +#endif
> +
> +#define CLONE_ALLNS    (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC |
> CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER)
>
>  /* Nested namespaces are supported only for these types */
>  #define CLONE_SUBNS    (CLONE_NEWNS)
> diff --git a/include/user_ns.h b/include/user_ns.h
> new file mode 100644
> index 0000000..715b155
> --- /dev/null
> +++ b/include/user_ns.h
> @@ -0,0 +1,9 @@
> +#ifndef __CR_USER_NS_H__
> +#define __CR_USER_NS_H__
> +
> +extern int dump_user_ns(int ns_pid, int ns_id);
> +extern int restore_user_ns(int real_pid, int ns_id);
> +
> +extern struct ns_desc user_ns_desc;
> +
> +#endif /* __CR_USER_NS_H__ */
> diff --git a/namespaces.c b/namespaces.c
> index 6be030f..8c0d842 100644
> --- a/namespaces.c
> +++ b/namespaces.c
> @@ -9,6 +9,7 @@
>  #include "uts_ns.h"
>  #include "ipc_ns.h"
>  #include "mount.h"
> +#include "user_ns.h"
>  #include "pstree.h"
>  #include "namespaces.h"
>  #include "net.h"
> @@ -271,7 +272,7 @@ struct ns_file_info {
>  static int open_ns_fd(struct file_desc *d)
>  {
>         struct ns_file_info *nfi = container_of(d, struct ns_file_info, d);
> -       struct pstree_item *item, *t;
> +       struct pstree_item *item = NULL, *t;
>         struct ns_desc *nd = NULL;
>         char path[64];
>         int fd;
> @@ -304,6 +305,10 @@ static int open_ns_fd(struct file_desc *d)
>                         item = t;
>                         nd = &mnt_ns_desc;
>                         break;
> +               } else if (ids->user_ns_id == nfi->nfe->ns_id) {
> +                       item = t;
> +                       nd = &user_ns_desc;
> +                       break;
>                 }
>         }
>
> @@ -391,6 +396,13 @@ int dump_task_ns_ids(struct pstree_item *item)
>                 return -1;
>         }
>
> +       ids->has_user_ns_id = true;
> +       ids->user_ns_id = get_ns_id(pid, &user_ns_desc);
> +       if (!ids->user_ns_id) {
> +               pr_err("Can't make userns id\n");
> +               return -1;
> +       }
> +
>         return 0;
>  }
>
> @@ -446,6 +458,11 @@ static int do_dump_namespaces(struct ns_id *ns)
>                                 ns->id, ns->pid);
>                 ret = dump_net_ns(ns->pid, ns->id);
>                 break;
> +       case CLONE_NEWUSER:
> +               pr_info("Dump USER namespace info %d via %d\n",
> +                               ns->id, ns->pid);
> +               ret = dump_user_ns(ns->pid, ns->id);
> +               break;
>         default:
>                 pr_err("Unknown namespace flag %x", ns->nd->cflag);
>                 break;
> @@ -604,9 +621,15 @@ int try_show_namespaces(int ns_pid)
>                 close(fd);
>         }
>
> +       fd = open_image(CR_FD_USERNS, O_SHOW, ids->user_ns_id);
> +       if (fd > 0) {
> +               pr_msg("-------------------USERNS---------------------\n");
> +               cr_parse_fd(fd, fdset_template[CR_FD_USERNS].magic);
> +               close(fd);
> +       }
> +
>         pr_msg("---[ end of %d namespaces ]---\n", ns_pid);
>         return 0;
>  }
>
>  struct ns_desc pid_ns_desc = NS_DESC_ENTRY(CLONE_NEWPID, "pid");
> -struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
> diff --git a/protobuf-desc.c b/protobuf-desc.c
> index b97418b..9199b09 100644
> --- a/protobuf-desc.c
> +++ b/protobuf-desc.c
> @@ -38,6 +38,7 @@
>  #include "protobuf/sk-packet.pb-c.h"
>  #include "protobuf/creds.pb-c.h"
>  #include "protobuf/timer.pb-c.h"
> +#include "protobuf/userns.pb-c.h"
>  #include "protobuf/utsns.pb-c.h"
>  #include "protobuf/ipc-var.pb-c.h"
>  #include "protobuf/ipc-shm.pb-c.h"
> diff --git a/protobuf/Makefile b/protobuf/Makefile
> index 7f6485b..cd2b854 100644
> --- a/protobuf/Makefile
> +++ b/protobuf/Makefile
> @@ -50,6 +50,7 @@ proto-obj-y   += ipc-shm.o
>  proto-obj-y    += ipc-msg.o
>  proto-obj-y    += ipc-sem.o
>  proto-obj-y    += utsns.o
> +proto-obj-y    += userns.o
>  proto-obj-y    += creds.o
>  proto-obj-y    += vma.o
>  proto-obj-y    += netdev.o
> diff --git a/protobuf/core.proto b/protobuf/core.proto
> index d850e2e..8810376 100644
> --- a/protobuf/core.proto
> +++ b/protobuf/core.proto
> @@ -32,6 +32,7 @@ message task_kobj_ids_entry {
>         optional uint32                 ipc_ns_id       = 7;
>         optional uint32                 uts_ns_id       = 8;
>         optional uint32                 mnt_ns_id       = 9;
> +       optional uint32                 user_ns_id      = 10;
>  }
>
>  message thread_sas_entry {
> diff --git a/protobuf/userns.proto b/protobuf/userns.proto
> new file mode 100644
> index 0000000..31d7718
> --- /dev/null
> +++ b/protobuf/userns.proto
> @@ -0,0 +1,9 @@
> +message userns_entry {
> +       message map_entry {
> +               required uint32 id_in   = 1;
> +               required uint32 id_out  = 2;
> +               required uint32 length  = 3;
> +       }
> +       repeated map_entry uid_map = 1;
> +       repeated map_entry gid_map = 2;
> +}
> \ No newline at end of file
> diff --git a/pstree.c b/pstree.c
> index d005b64..c905317 100644
> --- a/pstree.c
> +++ b/pstree.c
> @@ -603,6 +603,8 @@ static unsigned long get_clone_mask(TaskKobjIdsEntry
> *i,
>                 mask |= CLONE_NEWUTS;
>         if (i->mnt_ns_id != p->mnt_ns_id)
>                 mask |= CLONE_NEWNS;
> +       if (i->user_ns_id != p->user_ns_id)
> +               mask |= CLONE_NEWUSER;
>
>         return mask;
>  }
> diff --git a/user_ns.c b/user_ns.c
> new file mode 100644
> index 0000000..e90f068
> --- /dev/null
> +++ b/user_ns.c
> @@ -0,0 +1,227 @@
> +#include <unistd.h>
> +
> +#include "namespaces.h"
> +#include "user_ns.h"
> +#include "list.h"
> +
> +#include "protobuf.h"
> +#include "protobuf/userns.pb-c.h"
> +
> +struct map_entry {
> +       UsernsEntry__MapEntry   entry;
> +       struct list_head        list;
> +};
> +
> +static void cleanup(int *fd, struct list_head *uid_list,
> +                   struct list_head *gid_list)
> +{
> +       struct map_entry *pos, *tmp;
> +
> +       /* free uid entries */
> +       list_for_each_entry_safe(pos, tmp, uid_list, list) {
> +               list_del(&pos->list);
> +               xfree(pos);
> +       }
> +
> +       /* free gid entries */
> +       list_for_each_entry_safe(pos, tmp, gid_list, list) {
> +               list_del(&pos->list);
> +               xfree(pos);
> +       }
> +
> +       close_safe(fd);
> +}
> +
> +static void fill_map(int n_entries, UsernsEntry__MapEntry **map,
> +                    struct list_head *head)
> +{
> +       struct map_entry *tmp;
> +
> +       list_for_each_entry(tmp, head, list) {
> +               map[--n_entries] = &(tmp->entry);
> +       }
> +}
> +
> +static int write_pb(int fd, int n_uid_entries, int n_gid_entries,
> +                   struct list_head *uid_list, struct list_head *gid_list)
> +{
> +       UsernsEntry ue = USERNS_ENTRY__INIT;
> +
> +       UsernsEntry__MapEntry *uidmap[n_uid_entries];
> +       fill_map(n_uid_entries, uidmap, uid_list);
> +       ue.uid_map = uidmap;
> +       ue.n_uid_map = n_uid_entries;
> +
> +       UsernsEntry__MapEntry *gidmap[n_gid_entries];
> +       fill_map(n_gid_entries, gidmap, gid_list);
> +       ue.gid_map = gidmap;
> +       ue.n_gid_map = n_gid_entries;
> +
> +       return pb_write_one(fd, &ue, PB_USERNS);
> +}
> +
> +static int read_map_entries(int pid, const char *id_map, struct list_head
> *head)
> +{
> +       int n_read, n_entries = 0;
> +       FILE *fp;
> +       struct map_entry *tmp;
> +       UsernsEntry__MapEntry entry = USERNS_ENTRY__MAP_ENTRY__INIT;
> +
> +       pr_debug("Reading entries from /proc/%d/%s\n", pid, id_map);
> +
> +       fp = fopen_proc(pid, "%s", id_map);
> +       if (!fp) {
> +               pr_perror("Error opening /proc/%d/%s\n", pid, id_map);
> +               return -1;
> +       }
> +
> +       while ((n_read = fscanf(fp, "%u %u %u\n", &entry.id_in,
> +                               &entry.id_out, &entry.length)) != EOF) {
> +               if (n_read != 3) {
> +                       pr_perror("Error reading /proc/%d/%s, fscanf
> returned %d",
> +                                 pid, id_map, n_read);
> +                       fclose(fp);
> +                       return -1;
> +               }
> +
> +               tmp = (struct map_entry *)xmalloc(sizeof(struct
> map_entry));
> +               if (!tmp) {
> +                       fclose(fp);
> +                       return -1;
> +               }
> +
> +               memcpy(&tmp->entry, &entry, sizeof(UsernsEntry__MapEntry));
> +               list_add(&(tmp->list), head);
> +               n_entries++;
> +       }
> +
> +       if (fclose(fp) != 0) {
> +               pr_perror("fclose(/proc/%d/%s) failed", pid, id_map);
> +               return -1;
> +       }
> +
> +       return n_entries;
> +}
> +
> +static int write_map_entries(int pid, const char *id_map,
> +                             UsernsEntry__MapEntry **map,
> +                             size_t n_entries)
> +{
> +       int i, fd, n_written, total = 0;
> +       char buf[PAGE_SIZE];
> +
> +       pr_debug("Writing entries to /proc/%d/%s, n_entries=%lu\n",
> +                pid, id_map, n_entries);
> +
> +       for (i = 0; i < n_entries; i++) {
> +               n_written = snprintf(buf + total, sizeof(buf) - total,
> +                                    "%u %u %u\n",
> +                                     map[i]->id_in,
> +                                     map[i]->id_out,
> +                                     map[i]->length);
> +               if (n_written < 0) {
> +                       pr_err("snprintf failed for %s of pid: %d\n",
> id_map, pid);
> +                       return -1;
> +               }
> +               total += n_written;
> +       }
> +
> +       /* id_maps can only be written to once */
> +       fd = open_proc_rw(pid, "%s", id_map);
> +       if (fd < 0) {
> +               pr_perror("Unable to open /proc/%d/%s\n", pid, id_map);
> +               return -1;
> +       }
> +
> +       if (write(fd, buf, total) != total) {
> +               pr_perror("Failed to write all %d bytes to /proc/%d/%s",
> +                         total, pid, id_map);
> +               close_safe(&fd);
> +               return -1;
> +       }
> +
> +       close_safe(&fd);
> +
> +       return 0;
> +}
> +
> +int dump_user_ns(int ns_pid, int ns_id)
> +{
> +       int fd, ret, n_uid_entries, n_gid_entries;
> +
> +       LIST_HEAD(uid_list);
> +       LIST_HEAD(gid_list);
> +
> +       fd = open_image(CR_FD_USERNS, O_DUMP, ns_id);
> +       if (fd < 0) {
> +               pr_err("Error opening userns image");
> +               return -1;
> +       }
> +
> +       /* read uid map */
> +       n_uid_entries = read_map_entries(ns_pid, "uid_map", &uid_list);
> +       if (n_uid_entries < 0) {
> +               pr_err("Error reading uid_map\n");
> +               ret = -1;
> +               goto out;
> +       }
> +
> +       /* read gid map */
> +       n_gid_entries = read_map_entries(ns_pid, "gid_map", &gid_list);
> +       if (n_gid_entries < 0) {
> +               pr_err("Error reading gid_map\n");
> +               ret = -1;
> +               goto out;
> +       }
> +
> +       ret = write_pb(fd, n_uid_entries, n_gid_entries, &uid_list,
> &gid_list);
> +
> +out:
> +       cleanup(&fd, &uid_list, &gid_list);
> +       return ret;
> +}
> +
> +/*
> + * Restore uid_map and gid_map file for the init process. Since this is
> called
> + * from the parent, we access these files using the 'real_pid' of the
> process.
> + */
> +int restore_user_ns(int real_pid, int ns_id)
> +{
> +       int fd, ret = 0;
> +       UsernsEntry *ue;
> +
> +       pr_info("Restoring user namespace for real_pid:%d\n", real_pid);
> +
> +       fd = open_image(CR_FD_USERNS, O_RSTR, ns_id);
> +       if (fd < 0)
> +               return -1;
> +
> +       ret = pb_read_one(fd, &ue, PB_USERNS);
> +       if (ret < 0)
> +               return -1;
> +
> +       pr_info("userns restoring: n_uid_map:%lu ; n_gid_map:%lu\n",
> +               ue->n_uid_map, ue->n_gid_map);
> +
> +       /* restore uid_map */
> +       ret = write_map_entries(real_pid, "uid_map", ue->uid_map,
> ue->n_uid_map);
> +       if (ret < 0) {
> +               pr_err("Failed to restore /proc/%d/uid_map\n", real_pid);
> +               goto out;
> +       }
> +
> +       /* restore gid_map */
> +       ret = write_map_entries(real_pid, "gid_map", ue->gid_map,
> ue->n_gid_map);
> +       if (ret < 0) {
> +               pr_err("Failed to restore /proc/%d/gid_map", real_pid);
> +               goto out;
> +       }
> +
> +out:
> +       userns_entry__free_unpacked(ue, NULL);
> +
> +       close_safe(&fd);
> +       return ret;
> +}
> +
> +struct ns_desc user_ns_desc = NS_DESC_ENTRY(CLONE_NEWUSER, "user");
> --
> 2.1.0.rc2.206.gedb03e5
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openvz.org/pipermail/criu/attachments/20140812/9d5c7d66/attachment-0001.html>


More information about the CRIU mailing list