[CRIU] [PATCH] namespace: mark mount namespaces as populated after the forking stage
Andrew Vagin
avagin at virtuozzo.com
Wed Jun 15 09:29:50 PDT 2016
On Wed, Jun 15, 2016 at 02:46:52PM +0300, Pavel Emelyanov wrote:
> On 06/11/2016 09:01 PM, Andrey Vagin wrote:
> > From: Andrew Vagin <avagin at virtuozzo.com>
> >
> > Currently we mark a mount namespaces as populated when a target process
> > (ns_pid) switches into it. But if a process inherited the right
> > namespace from a parent, it doesn't call do_restore_task_mnt_ns()
> > and a namespace can remain unmarked.
>
> Why? It should be marked by the task that created one.
What is it create? All namespaces are created by the root task.
All these don't matter with this patch.
>
> > af55c059fb6 ("mount: fix a race between restoring namespaces and file mappings")
> > After this patch we could simplify logic around ns_populated. Currently
> > it's a futex, but nodoby waits on it. We can set ns_populated when we
> > are going to close namespace descriptors. To avoid additional locks, we
> > can do this when all task pass the forking stage and don't start the
> > next stage.
>
> Don't we need barriers between ns->populated = true and close(ns->fd)?
We don't need, because we set ns_populated when all task are stopped.
>From a root task, we wait when all task have passed the forking stage
and then enumirate all namespaces and mark them as populated.
>
> (more comments inline)
>
> > https://jira.sw.ru/browse/PSBM-48222
> >
> > Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> > ---
> > criu/cr-restore.c | 7 ++++---
> > criu/include/namespaces.h | 2 +-
> > criu/mount.c | 14 ++++++--------
> > criu/namespaces.c | 4 ++--
> > 4 files changed, 13 insertions(+), 14 deletions(-)
> >
> > diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> > index 1e3d823..5511447 100644
> > --- a/criu/cr-restore.c
> > +++ b/criu/cr-restore.c
> > @@ -1309,16 +1309,17 @@ static int restore_task_with_children(void *_arg)
> >
> > restore_pgid();
> >
> > - if (restore_finish_stage(CR_STATE_FORKING) < 0)
> > - goto err;
> > -
> > if (current->parent == NULL) {
> > + futex_wait_while_gt(&task_entries->nr_in_progress, 1);
>
> Why is it needed here?
Wait when all tasks have passed the forking stage.
>
> > if (depopulate_roots_yard())
> > goto err;
> >
> > fini_restore_mntns();
> > }
> >
> > + if (restore_finish_stage(CR_STATE_FORKING) < 0)
> > + goto err;
> > +
> > if (restore_one_task(current->pid.virt, ca->core))
> > goto err;
> >
> > diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> > index 4cd2c85..3a3f7d4 100644
> > --- a/criu/include/namespaces.h
> > +++ b/criu/include/namespaces.h
> > @@ -93,7 +93,7 @@ struct ns_id {
> > * are mounted) and other tasks may do setns on it
> > * and proceed.
> > */
> > - futex_t ns_populated;
> > + bool ns_populated;
> >
> > union {
> > struct {
> > diff --git a/criu/mount.c b/criu/mount.c
> > index e891c92..7c59ac6 100644
> > --- a/criu/mount.c
> > +++ b/criu/mount.c
> > @@ -2856,7 +2856,7 @@ static int rst_collect_local_mntns(enum ns_type typ)
> > if (!mntinfo)
> > return -1;
> >
> > - futex_set(&nsid->ns_populated, 1);
> > + nsid->ns_populated = true;
> > return 0;
> > }
> >
> > @@ -3112,9 +3112,6 @@ static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *curren
> > }
> > close(fd);
> >
> > - if (nsid->ns_pid == current->pid.virt)
> > - futex_set_and_wake(&nsid->ns_populated, 1);
> > -
> > return 0;
> > }
> >
> > @@ -3161,9 +3158,10 @@ void fini_restore_mntns(void)
> > for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> > if (nsid->nd != &mnt_ns_desc)
> > continue;
> > - close(nsid->mnt.ns_fd);
> > + close_safe(&nsid->mnt.ns_fd);
>
> Why _safe()?
close_safe() closes a file descriptor and set it in -1
>
> > if (nsid->type != NS_ROOT)
> > - close(nsid->mnt.root_fd);
> > + close_safe(&nsid->mnt.root_fd);
> > + nsid->ns_populated = true;
>
> This assignment is new. Was it lost?
We moved it from do_restore_task_mnt_ns().
>
> > }
> > }
> >
> > @@ -3431,7 +3429,7 @@ ns_created:
> > if (nsid->mnt.ns_fd < 0)
> > goto err;
> > /* we set ns_populated so we don't need to open root_fd */
> > - futex_set(&nsid->ns_populated, 1);
> > + nsid->ns_populated = true;
> > continue;
> > }
> >
> > @@ -3565,7 +3563,7 @@ int mntns_get_root_fd(struct ns_id *mntns)
> > * root from the root task.
> > */
> >
> > - if (!futex_get(&mntns->ns_populated)) {
> > + if (!mntns->ns_populated) {
> > int fd;
> >
> > fd = open_proc(root_item->pid.virt, "fd/%d", mntns->mnt.root_fd);
> > diff --git a/criu/namespaces.c b/criu/namespaces.c
> > index 6c42df0..157c9bf 100644
> > --- a/criu/namespaces.c
> > +++ b/criu/namespaces.c
> > @@ -298,7 +298,7 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
> > if (nsid) {
> > nsid->type = type;
> > nsid_add(nsid, nd, id, pid);
> > - futex_set(&nsid->ns_populated, 0);
> > + nsid->ns_populated = false;
> > }
> >
> > return nsid;
> > @@ -417,7 +417,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
> >
> > nsid->type = type;
> > nsid->kid = kid;
> > - futex_set(&nsid->ns_populated, 1);
> > + nsid->ns_populated = true;
> > nsid_add(nsid, nd, ns_next_id++, pid);
> >
> > found:
> >
>
More information about the CRIU
mailing list