[CRIU] [PATCH] namespace: mark mount namespaces as populated after the forking stage

Andrew Vagin avagin at virtuozzo.com
Wed Jun 15 09:29:50 PDT 2016


On Wed, Jun 15, 2016 at 02:46:52PM +0300, Pavel Emelyanov wrote:
> On 06/11/2016 09:01 PM, Andrey Vagin wrote:
> > From: Andrew Vagin <avagin at virtuozzo.com>
> > 
> > Currently we mark a mount namespaces as populated when a target process
> > (ns_pid) switches into it. But if a process inherited the right
> > namespace from a parent, it doesn't call do_restore_task_mnt_ns()
> > and a namespace can remain unmarked.
> 
> Why? It should be marked by the task that created one.

What is it create? All namespaces are created by the root task.

All these don't matter with this patch.

> 
> > af55c059fb6 ("mount: fix a race between restoring namespaces and file mappings")
> > After this patch we could simplify logic around ns_populated. Currently
> > it's a futex, but nodoby waits on it. We can set ns_populated when we
> > are going to close namespace descriptors. To avoid additional locks, we
> > can do this when all task pass the forking stage and don't start the
> > next stage.
> 
> Don't we need barriers between ns->populated = true and close(ns->fd)?

We don't need, because we set ns_populated when all task are stopped.
>From a root task, we wait when all task have passed the forking stage
and then enumirate all namespaces and mark them as populated.
> 
> (more comments inline)
> 
> > https://jira.sw.ru/browse/PSBM-48222
> > 
> > Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> > ---
> >  criu/cr-restore.c         |  7 ++++---
> >  criu/include/namespaces.h |  2 +-
> >  criu/mount.c              | 14 ++++++--------
> >  criu/namespaces.c         |  4 ++--
> >  4 files changed, 13 insertions(+), 14 deletions(-)
> > 
> > diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> > index 1e3d823..5511447 100644
> > --- a/criu/cr-restore.c
> > +++ b/criu/cr-restore.c
> > @@ -1309,16 +1309,17 @@ static int restore_task_with_children(void *_arg)
> >  
> >  	restore_pgid();
> >  
> > -	if (restore_finish_stage(CR_STATE_FORKING) < 0)
> > -		goto err;
> > -
> >  	if (current->parent == NULL) {
> > +		futex_wait_while_gt(&task_entries->nr_in_progress, 1);
> 
> Why is it needed here?

Wait when all tasks have passed the forking stage.
> 
> >  		if (depopulate_roots_yard())
> >  			goto err;
> >  
> >  		fini_restore_mntns();
> >  	}
> >  
> > +	if (restore_finish_stage(CR_STATE_FORKING) < 0)
> > +		goto err;
> > +
> >  	if (restore_one_task(current->pid.virt, ca->core))
> >  		goto err;
> >  
> > diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> > index 4cd2c85..3a3f7d4 100644
> > --- a/criu/include/namespaces.h
> > +++ b/criu/include/namespaces.h
> > @@ -93,7 +93,7 @@ struct ns_id {
> >  	 * are mounted) and other tasks may do setns on it
> >  	 * and proceed.
> >  	 */
> > -	futex_t ns_populated;
> > +	bool ns_populated;
> >  
> >  	union {
> >  		struct {
> > diff --git a/criu/mount.c b/criu/mount.c
> > index e891c92..7c59ac6 100644
> > --- a/criu/mount.c
> > +++ b/criu/mount.c
> > @@ -2856,7 +2856,7 @@ static int rst_collect_local_mntns(enum ns_type typ)
> >  	if (!mntinfo)
> >  		return -1;
> >  
> > -	futex_set(&nsid->ns_populated, 1);
> > +	nsid->ns_populated = true;
> >  	return 0;
> >  }
> >  
> > @@ -3112,9 +3112,6 @@ static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *curren
> >  	}
> >  	close(fd);
> >  
> > -	if (nsid->ns_pid == current->pid.virt)
> > -		futex_set_and_wake(&nsid->ns_populated, 1);
> > -
> >  	return 0;
> >  }
> >  
> > @@ -3161,9 +3158,10 @@ void fini_restore_mntns(void)
> >  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> >  		if (nsid->nd != &mnt_ns_desc)
> >  			continue;
> > -		close(nsid->mnt.ns_fd);
> > +		close_safe(&nsid->mnt.ns_fd);
> 
> Why _safe()?

close_safe() closes a file descriptor and set it in -1

> 
> >  		if (nsid->type != NS_ROOT)
> > -			close(nsid->mnt.root_fd);
> > +			close_safe(&nsid->mnt.root_fd);
> > +		nsid->ns_populated = true;
> 
> This assignment is new. Was it lost?

We moved it from do_restore_task_mnt_ns().

> 
> >  	}
> >  }
> >  
> > @@ -3431,7 +3429,7 @@ ns_created:
> >  			if (nsid->mnt.ns_fd < 0)
> >  				goto err;
> >  			/* we set ns_populated so we don't need to open root_fd */
> > -			futex_set(&nsid->ns_populated, 1);
> > +			nsid->ns_populated = true;
> >  			continue;
> >  		}
> >  
> > @@ -3565,7 +3563,7 @@ int mntns_get_root_fd(struct ns_id *mntns)
> >  	 * root from the root task.
> >  	 */
> >  
> > -	if (!futex_get(&mntns->ns_populated)) {
> > +	if (!mntns->ns_populated) {
> >  		int fd;
> >  
> >  		fd = open_proc(root_item->pid.virt, "fd/%d", mntns->mnt.root_fd);
> > diff --git a/criu/namespaces.c b/criu/namespaces.c
> > index 6c42df0..157c9bf 100644
> > --- a/criu/namespaces.c
> > +++ b/criu/namespaces.c
> > @@ -298,7 +298,7 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
> >  	if (nsid) {
> >  		nsid->type = type;
> >  		nsid_add(nsid, nd, id, pid);
> > -		futex_set(&nsid->ns_populated, 0);
> > +		nsid->ns_populated = false;
> >  	}
> >  
> >  	return nsid;
> > @@ -417,7 +417,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
> >  
> >  	nsid->type = type;
> >  	nsid->kid = kid;
> > -	futex_set(&nsid->ns_populated, 1);
> > +	nsid->ns_populated = true;
> >  	nsid_add(nsid, nd, ns_next_id++, pid);
> >  
> >  found:
> > 
> 


More information about the CRIU mailing list