[CRIU] [PATCH 3/3] mount: open a root directory for mount namesapces via the root task
Andrew Vagin
avagin at virtuozzo.com
Tue Aug 2 11:04:19 PDT 2016
On Tue, Aug 02, 2016 at 03:08:17PM +0300, Pavel Emelyanov wrote:
> On 07/27/2016 09:21 PM, Andrey Vagin wrote:
> > From: Andrew Vagin <avagin at virtuozzo.com>
> >
> > We use openat() syscall to open files in various mount namespaces.
> > Recently we found that if a path contains absolute symlinks, openat()
> > syscall resolves them relative to the current root, but they has to be
> > resolved relative to the root of the target namespace, so we need to
> > change root before restoring a file descriprot.
>
> OK, what's the exact problem we're having now? Which openat() is called
> on a path with symlink?
When we restore a file, we use openat() to open it:
fd = openat(ns_root_fd, rfi->path, flags);
but rfi->path can containe absolute symlinks and in this case, openat()
will be not enogh and we need to make chroot to resolve symlinks
correctly.
fchroot(ns_root_fd)
open(rfi->path, flags);
>
> > But for that we need to rework a method how we open a root directory
> > for a specified mount namespace. Currently we open /proc/pid/root for
> > one of processes from this namespace. If a process will change a root
> > directory, this way will not work.
> >
> > In this patch we open all namespaces in the root task and then any
> > process will be able to open one of these descriptors via /proc/pid/fd.
> >
> > Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> > ---
> > criu/cr-restore.c | 32 ++++++++++++++------------------
> > criu/mount.c | 48 ++++++++++++++++++++++++++++++++----------------
> > criu/pstree.c | 1 +
> > 3 files changed, 47 insertions(+), 34 deletions(-)
> >
> > diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> > index 3437b76..1de2682 100644
> > --- a/criu/cr-restore.c
> > +++ b/criu/cr-restore.c
> > @@ -1469,16 +1469,6 @@ static int restore_task_with_children(void *_arg)
> >
> > restore_pgid();
> >
> > - if (current->parent == NULL) {
> > - /*
> > - * Wait when all tasks passed the CR_STATE_FORKING stage.
> > - * It means that all tasks entered into their namespaces.
> > - */
> > - futex_wait_while_gt(&task_entries->nr_in_progress, 1);
> > -
> > - fini_restore_mntns();
> > - }
> > -
> > if (restore_finish_stage(CR_STATE_FORKING) < 0)
> > goto err;
> >
> > @@ -2907,6 +2897,20 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
> > goto err_nv;
> >
> > /*
> > + * Make root and cwd restore _that_ late not to break any
> > + * attempts to open files by paths above (e.g. /proc).
> > + */
> > + if (restore_fs(current))
> > + goto err;
> > +
> > + if (current->parent == NULL) {
> > + /* Wait when all tasks restored all files */
> > + futex_wait_while_gt(&task_entries->nr_in_progress,
> > + current->nr_threads);
> > + fini_restore_mntns();
> > + }
> > +
> > + /*
> > * We're about to search for free VM area and inject the restorer blob
> > * into it. No irrelevent mmaps/mremaps beyond this point, otherwise
> > * this unwanted mapping might get overlapped by the restorer.
> > @@ -3151,14 +3155,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
> > task_args->nr_threads = current->nr_threads;
> > task_args->thread_args = thread_args;
> >
> > - /*
> > - * Make root and cwd restore _that_ late not to break any
> > - * attempts to open files by paths above (e.g. /proc).
> > - */
> > -
> > - if (restore_fs(current))
> > - goto err;
> > -
> > close_image_dir();
> > close_proc();
> > close_service_fd(ROOT_FD_OFF);
> > diff --git a/criu/mount.c b/criu/mount.c
> > index 7c280c0..5a167b1 100644
> > --- a/criu/mount.c
> > +++ b/criu/mount.c
> > @@ -3160,9 +3160,7 @@ void fini_restore_mntns(void)
> > if (nsid->nd != &mnt_ns_desc)
> > continue;
> > close_safe(&nsid->mnt.ns_fd);
> > - if (nsid->type != NS_ROOT)
> > - close_safe(&nsid->mnt.root_fd);
> > - nsid->ns_populated = true;
> > + close_safe(&nsid->mnt.root_fd);
> > }
> > }
> >
> > @@ -3350,6 +3348,34 @@ void cleanup_mnt_ns(void)
> > pr_perror("Can't remove the directory %s", mnt_roots);
> > }
> >
> > +static int open_mnt_ns(struct ns_id *nsid, struct rst_info *rst)
> > +{
> > + int fd, tfd;
> > +
> > + /* Pin one with a file descriptor */
> > + fd = open_proc(PROC_SELF, "ns/mnt");
> > + if (fd < 0)
> > + return -1;
> > + tfd = reopen_as_unused_fd(fd, rst);
> > + if (tfd < 0) {
> > + close(fd);
> > + return -1;
> > + }
> > + nsid->mnt.ns_fd = tfd;
> > +
> > + fd = open_proc(PROC_SELF, "root");
> > + if (fd < 0)
> > + return -1;
> > + tfd = reopen_as_unused_fd(fd, rst);
> > + if (tfd < 0) {
> > + close(fd);
> > + return -1;
> > + }
> > + nsid->mnt.root_fd = tfd;
> > +
> > + return 0;
> > +}
> > +
> > int prepare_mnt_ns(void)
> > {
> > int ret = -1, rst = -1;
> > @@ -3483,12 +3509,8 @@ ns_created:
> > if (nsid->nd != &mnt_ns_desc)
> > continue;
> > if (nsid->type == NS_ROOT) {
> > - /* Pin one with a file descriptor */
> > - nsid->mnt.ns_fd = open_proc(PROC_SELF, "ns/mnt");
> > - if (nsid->mnt.ns_fd < 0)
> > + if (open_mnt_ns(nsid, rsti(root_item)))
> > goto err;
> > - /* we set ns_populated so we don't need to open root_fd */
> > - nsid->ns_populated = true;
> > continue;
> > }
> >
> > @@ -3504,14 +3526,7 @@ ns_created:
> > if (cr_pivot_root(path))
> > goto err;
> >
> > - /* Pin one with a file descriptor */
> > - nsid->mnt.ns_fd = open_proc(PROC_SELF, "ns/mnt");
> > - if (nsid->mnt.ns_fd < 0)
> > - goto err;
> > -
> > - /* root_fd is used to restore file mappings */
> > - nsid->mnt.root_fd = open_proc(PROC_SELF, "root");
> > - if (nsid->mnt.root_fd < 0)
> > + if (open_mnt_ns(nsid, rsti(root_item)))
> > goto err;
> >
> > /* And return back to regain the access to the roots yard */
> > @@ -3628,6 +3643,7 @@ int mntns_get_root_fd(struct ns_id *mntns)
> > fd = open_proc(root_item->pid.virt, "fd/%d", mntns->mnt.root_fd);
> > if (fd < 0)
> > return -1;
> > + close_pid_proc();
> >
> > return mntns_set_root_fd(mntns->ns_pid, fd);
> > }
> > diff --git a/criu/pstree.c b/criu/pstree.c
> > index af89dbb..94b8021 100644
> > --- a/criu/pstree.c
> > +++ b/criu/pstree.c
> > @@ -943,6 +943,7 @@ static int prepare_pstree_for_unshare(void)
> > fake_root->threads->virt = INIT_PID;
> > fake_root->ids = root_ids;
> > rsti(fake_root)->clone_flags = opts.unshare_flags | rsti(root_item)->clone_flags;
> > + INIT_LIST_HEAD(&rsti(fake_root)->used);
> >
> > rsti(fake_root)->helper_cb = do_fake_init;
> >
> >
>
More information about the CRIU
mailing list