[Devel] Re: [RFC][PATCH] Make access to taks's nsproxy liter
Serge E. Hallyn
serue at us.ibm.com
Wed Aug 8 09:48:54 PDT 2007
Quoting Pavel Emelyanov (xemul at openvz.org):
> When someone wants to deal with some other taks's namespaces
> it has to lock the task and then to get the desired namespace
> if the one exists. This is slow on read-only paths and may be
> impossible in some cases.
>
> E.g. Oleg recently noticed a race between unshare() and the
> (just sent for review) pid namespaces - when the task notifies
> the parent it has to know the parent's namespace, but taking
> the task_lock() is impossible there - the code is under write
> locked tasklist lock.
>
> On the other hand switching the namespace on task (daemonize)
> and releasing the namespace (after the last task exit) is rather
> rare operation and we can sacrifice its speed to solve the
> issues above.
>
> Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
>
> ---
>
> fs/proc/base.c | 27 +++++++++++++++++----------
> include/linux/nsproxy.h | 19 +++++++------------
> kernel/exit.c | 4 +---
> kernel/fork.c | 11 +++++------
> kernel/nsproxy.c | 38 ++++++++++++++++++++++++++++++--------
> 5 files changed, 60 insertions(+), 39 deletions(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index ed2b224..614851a 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -371,18 +371,21 @@ struct proc_mounts {
> static int mounts_open(struct inode *inode, struct file *file)
> {
> struct task_struct *task = get_proc_task(inode);
> + struct nsproxy *nsp;
> struct mnt_namespace *ns = NULL;
> struct proc_mounts *p;
> int ret = -EINVAL;
>
> if (task) {
> - task_lock(task);
> - if (task->nsproxy) {
> - ns = task->nsproxy->mnt_ns;
> + rcu_read_lock();
> + nsp = task_nsproxy(task);
> + if (nsp) {
> + ns = nsp->mnt_ns;
> if (ns)
> get_mnt_ns(ns);
> }
> - task_unlock(task);
> + rcu_read_unlock();
> +
> put_task_struct(task);
> }
>
> @@ -445,16 +448,20 @@ static int mountstats_open(struct inode
>
> if (!ret) {
> struct seq_file *m = file->private_data;
> + struct nsproxy *nsp;
> struct mnt_namespace *mnt_ns = NULL;
> struct task_struct *task = get_proc_task(inode);
>
> if (task) {
> - task_lock(task);
> - if (task->nsproxy)
> - mnt_ns = task->nsproxy->mnt_ns;
> - if (mnt_ns)
> - get_mnt_ns(mnt_ns);
> - task_unlock(task);
> + rcu_read_lock();
> + nsp = task_nsproxy(task);
> + if (nsp) {
> + mnt_ns = nsp->mnt_ns;
> + if (mnt_ns)
> + get_mnt_ns(mnt_ns);
> + }
> + rcu_read_unlock();
> +
> put_task_struct(task);
> }
>
> diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
> index 525d8fc..74f21fe 100644
> --- a/include/linux/nsproxy.h
> +++ b/include/linux/nsproxy.h
> @@ -32,8 +32,14 @@ struct nsproxy {
> };
> extern struct nsproxy init_nsproxy;
>
> +static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
> +{
> + return rcu_dereference(tsk->nsproxy);
> +}
Looks like a very nice cleanup as well. But please add a comment
above task_nsproxy() that it must be called under rcu_read_lock()
or task_lock(task) (though I'll admit the rcu_dereference may make that
obvious)
> int copy_namespaces(unsigned long flags, struct task_struct *tsk);
> -void get_task_namespaces(struct task_struct *tsk);
> +void exit_task_namespaces(struct task_struct *tsk);
> +void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
> void free_nsproxy(struct nsproxy *ns);
> int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
> struct fs_struct *);
> @@ -45,17 +51,6 @@ static inline void put_nsproxy(struct ns
> }
> }
>
> -static inline void exit_task_namespaces(struct task_struct *p)
> -{
> - struct nsproxy *ns = p->nsproxy;
> - if (ns) {
> - task_lock(p);
> - p->nsproxy = NULL;
> - task_unlock(p);
> - put_nsproxy(ns);
> - }
> -}
> -
> #ifdef CONFIG_CONTAINER_NS
> int ns_container_clone(struct task_struct *tsk);
> #else
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 2f4f35e..a2aef1f 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -409,9 +409,7 @@ void daemonize(const char *name, ...)
> current->fs = fs;
> atomic_inc(&fs->count);
>
> - exit_task_namespaces(current);
> - current->nsproxy = init_task.nsproxy;
> - get_task_namespaces(current);
> + switch_task_namespaces(current, init_task.nsproxy);
>
> exit_files(current);
> current->files = init_task.files;
> diff --git a/kernel/fork.c b/kernel/fork.c
> index bc48e0f..0c6dd67 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1618,7 +1618,7 @@ asmlinkage long sys_unshare(unsigned lon
> struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
> struct files_struct *fd, *new_fd = NULL;
> struct sem_undo_list *new_ulist = NULL;
> - struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
> + struct nsproxy *new_nsproxy = NULL;
>
> check_unshare_flags(&unshare_flags);
>
> @@ -1647,14 +1647,13 @@ asmlinkage long sys_unshare(unsigned lon
>
> if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) {
>
> - task_lock(current);
> -
> if (new_nsproxy) {
> - old_nsproxy = current->nsproxy;
> - current->nsproxy = new_nsproxy;
> - new_nsproxy = old_nsproxy;
> + switch_task_namespaces(current, new_nsproxy);
> + new_nsproxy = NULL;
> }
>
> + task_lock(current);
> +
> if (new_fs) {
> fs = current->fs;
> current->fs = new_fs;
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index 58843ae..91dca27 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -30,14 +30,6 @@ static inline void get_nsproxy(struct ns
> atomic_inc(&ns->count);
> }
>
> -void get_task_namespaces(struct task_struct *tsk)
> -{
> - struct nsproxy *ns = tsk->nsproxy;
> - if (ns) {
> - get_nsproxy(ns);
> - }
> -}
> -
> /*
> * creates a copy of "orig" with refcount 1.
> */
> @@ -205,6 +197,36 @@ out:
> return err;
> }
>
> +void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
> +{
> + struct nsproxy *ns;
> +
> + might_sleep();
> +
> + ns = p->nsproxy;
> + if (ns == new)
> + return;
> +
> + if (new)
> + get_nsproxy(new);
> + rcu_assign_pointer(p->nsproxy, new);
> +
> + if (ns && atomic_dec_and_test(&ns->count)) {
> + /*
> + * wait for others to get what they want from this
> + * nsproxy. cannot release this nsproxy via the
> + * call_rcu() since put_mnt_ns will want to sleep
> + */
> + synchronize_rcu();
> + free_nsproxy(ns);
> + }
> +}
Also a comment above switch_task_namespaces() that it must be called
with task_lock held.
thanks,
-serge
> +
> +void exit_task_namespaces(struct task_struct *p)
> +{
> + switch_task_namespaces(p, NULL);
> +}
> +
> static int __init nsproxy_cache_init(void)
> {
> nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list