[Devel] [RFC][PATCH 3/5] Use pid namespace from struct pid_nrs list
sukadev at us.ibm.com
sukadev at us.ibm.com
Fri Mar 9 20:02:29 PST 2007
From: Sukadev Bhattiprolu <sukadev at us.ibm.com>
Subject: [RFC][PATCH 3/5] Use pid namespace from struct pid_nrs list
Stop using task->nsproxy->pid_ns. Use pid_namespace from pid->pid_nrs
list instead.
To simplify error handling, this patch moves processing of CLONE_NEWPID
flag, currently in copy_namespaces()/copy_process(), to alloc_pid() which
is where the process association with a pid namespace is established.
i.e when cloning a new pid namespace, alloc_pid() allocates a new pid_nr
for both the parent and child namespaces.
Changelog:
- Move definition of set_pid_ns_child_reaper() into the previous
helper function patch.
- Minor changes to accomodate changes in underlying patches.
- Fix compile warnings about parent_pid_ns (Cedric Le Goater)
- [Badari Pulavarty's comments]: No need to allocate nsproxy when
only CLONE_NEWPID is set. And attach_pid_nr() only needs one
parameter.
- Add privilege check for clone(CLONE_NEWPID).
Signed-off-by: Sukadev Bhattiprolu <sukadev at us.ibm.com>
Cc: Cedric Le Goater <clg at fr.ibm.com>
Cc: Dave Hansen <haveblue at us.ibm.com>
Cc: Serge Hallyn <serue at us.ibm.com>
Cc: containers at lists.osdl.org
---
include/linux/pid.h | 2 -
include/linux/pid_namespace.h | 11 ++-------
kernel/fork.c | 12 ++++++---
kernel/nsproxy.c | 11 ---------
kernel/pid.c | 51 ++++++++++++++++++++++++++++++------------
5 files changed, 50 insertions(+), 37 deletions(-)
Index: lx26-20-mm2b/include/linux/pid.h
===================================================================
--- lx26-20-mm2b.orig/include/linux/pid.h 2007-03-09 19:00:11.000000000 -0800
+++ lx26-20-mm2b/include/linux/pid.h 2007-03-09 19:01:09.000000000 -0800
@@ -119,7 +119,7 @@ extern struct pid *find_ge_pid(int nr);
extern int attach_pid_nr(struct pid *pid, struct pid_nr *pid_nr);
extern void free_pid_nr(struct pid_nr *pid_nr);
extern struct pid_nr *alloc_pid_nr(struct pid_namespace *pid_ns);
-extern struct pid *alloc_pid(void);
+extern struct pid *alloc_pid(int clone_flags);
extern void FASTCALL(free_pid(struct pid *pid));
extern pid_t pid_nr(struct pid *pid);
Index: lx26-20-mm2b/kernel/fork.c
===================================================================
--- lx26-20-mm2b.orig/kernel/fork.c 2007-03-09 19:00:42.000000000 -0800
+++ lx26-20-mm2b/kernel/fork.c 2007-03-09 19:01:09.000000000 -0800
@@ -1144,6 +1144,8 @@ static struct task_struct *copy_process(
if (clone_flags & CLONE_THREAD)
p->tgid = current->tgid;
+ if ((retval = priv_check_pid_ns(clone_flags)))
+ goto bad_fork_cleanup_policy;
if ((retval = security_task_alloc(p)))
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
@@ -1169,6 +1171,9 @@ static struct task_struct *copy_process(
if (retval)
goto bad_fork_cleanup_namespaces;
+ /* We are now ready to set child reaper if we cloned pid ns */
+ set_pid_ns_child_reaper(clone_flags, pid, p);
+
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
@@ -1384,7 +1389,7 @@ long do_fork(unsigned long clone_flags,
int __user *child_tidptr)
{
struct task_struct *p;
- struct pid *pid = alloc_pid();
+ struct pid *pid = alloc_pid(clone_flags);
long nr;
if (!pid)
@@ -1671,7 +1676,7 @@ asmlinkage long sys_unshare(unsigned lon
if ((err = unshare_pid_ns(unshare_flags, &new_pid_nr)))
goto bad_unshare_cleanup_ipc;
- if (new_ns || new_uts || new_ipc || new_pid_nr) {
+ if (new_ns || new_uts || new_ipc) {
old_nsproxy = current->nsproxy;
new_nsproxy = dup_nsproxy(old_nsproxy);
if (!new_nsproxy) {
@@ -1730,8 +1735,7 @@ asmlinkage long sys_unshare(unsigned lon
}
if (new_pid_nr) {
- pid = task_pid_ns(current);
- set_task_pid_ns(current, new_pid_nr->pid_ns);
+ attach_pid_nr(task_pid(current), new_pid_nr);
new_pid_nr = NULL;
}
Index: lx26-20-mm2b/kernel/nsproxy.c
===================================================================
--- lx26-20-mm2b.orig/kernel/nsproxy.c 2007-03-09 19:00:14.000000000 -0800
+++ lx26-20-mm2b/kernel/nsproxy.c 2007-03-09 19:01:09.000000000 -0800
@@ -67,8 +67,6 @@ struct nsproxy *dup_nsproxy(struct nspro
get_uts_ns(ns->uts_ns);
if (ns->ipc_ns)
get_ipc_ns(ns->ipc_ns);
- if (ns->pid_ns)
- get_pid_ns(ns->pid_ns);
}
return ns;
@@ -90,7 +88,7 @@ int copy_nsproxy(int flags, struct task_
get_nsproxy(old_ns);
- ns_all = CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWPID;
+ ns_all = CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC;
if (!(flags & ns_all))
return 0;
@@ -115,17 +113,10 @@ int copy_nsproxy(int flags, struct task_
if (err)
goto out_ipc;
- err = copy_pid_ns(flags, tsk);
- if (err)
- goto out_pid;
-
out:
put_nsproxy(old_ns);
return err;
-out_pid:
- if (new_ns->ipc_ns)
- put_ipc_ns(new_ns->ipc_ns);
out_ipc:
if (new_ns->uts_ns)
put_uts_ns(new_ns->uts_ns);
Index: lx26-20-mm2b/kernel/pid.c
===================================================================
--- lx26-20-mm2b.orig/kernel/pid.c 2007-03-09 19:00:42.000000000 -0800
+++ lx26-20-mm2b/kernel/pid.c 2007-03-09 19:01:09.000000000 -0800
@@ -221,8 +221,13 @@ fastcall void free_pid(struct pid *pid)
hlist_del_rcu(&pid->pid_chain);
spin_unlock_irqrestore(&pidmap_lock, flags);
- hlist_for_each_entry(pid_nr, pos, &pid->pid_nrs, node)
+ hlist_for_each_entry(pid_nr, pos, &pid->pid_nrs, node) {
free_pidmap(pid_nr->pid_ns, pid_nr->nr);
+
+ /* put the reference we got in kref_init() in clone_pid_ns() */
+ if (pid_nr->nr == 1)
+ put_pid_ns(pid_nr->pid_ns);
+ }
call_rcu(&pid->rcu, delayed_put_pid);
}
@@ -357,34 +362,48 @@ struct pid_namespace *pid_ns(struct pid
return ns;
}
-struct pid *alloc_pid(void)
+struct pid *alloc_pid(int flags)
{
struct pid *pid;
enum pid_type type;
- int nr = -1;
- struct pid_nr *pid_nr;
+ struct pid_nr *pid_nr[2] = { NULL, NULL};
+ struct pid_namespace *new_pid_ns = NULL;
pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
if (!pid)
return NULL;
- nr = alloc_pidmap(task_pid_ns(current));
- if (nr < 0)
+ pid_nr[0] = alloc_pidmap_pid_nr(task_pid_ns(current));
+ if (!pid_nr[0] < 0)
goto out_free_pid;
- pid_nr = alloc_pid_nr(task_pid_ns(current));
- if (!pid_nr)
- goto out_free_pidmap;
-
+ if (flags & CLONE_NEWPID) {
+ new_pid_ns = clone_pid_ns();
+ if (!new_pid_ns)
+ goto out_free_pid_nr0;
+ /*
+ * For now, allocate a pid_nr only for the new pid namespace.
+ * Eventually we should allocate a pid_nr for each ancestor
+ * namespace. While this could cost us additional memory in
+ * deeply nested containers, it would allow us to see/signal
+ * all processes from init-pid-ns.
+ */
+ pid_nr[1] = alloc_pidmap_pid_nr(new_pid_ns);
+ if (!pid_nr[1])
+ goto out_free_pid_ns;
+ }
atomic_set(&pid->count, 1);
- pid->nr = pid_nr->nr = nr; /* pid->nr to be removed soon */
+ pid->nr = pid_nr[0]->nr; /* pid->nr to be removed soon */
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
spin_lock_init(&pid->lock);
INIT_HLIST_HEAD(&pid->pid_nrs);
- hlist_add_head_rcu(&pid_nr->node, &pid->pid_nrs);
+ hlist_add_head_rcu(&pid_nr[0]->node, &pid->pid_nrs);
+
+ if (pid_nr[1])
+ hlist_add_head_rcu(&pid_nr[1]->node, &pid->pid_nrs);
spin_lock_irq(&pidmap_lock);
hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
@@ -392,11 +411,15 @@ struct pid *alloc_pid(void)
return pid;
-out_free_pidmap:
- free_pidmap(task_pid_ns(current), nr);
+out_free_pid_ns:
+ put_pid_ns(new_pid_ns);
+
+out_free_pid_nr0:
+ free_pidmap_pid_nr(pid_nr[0]);
out_free_pid:
kmem_cache_free(pid_cachep, pid);
+
return NULL;
}
Index: lx26-20-mm2b/include/linux/pid_namespace.h
===================================================================
--- lx26-20-mm2b.orig/include/linux/pid_namespace.h 2007-03-09 19:00:11.000000000 -0800
+++ lx26-20-mm2b/include/linux/pid_namespace.h 2007-03-09 19:01:09.000000000 -0800
@@ -33,6 +33,7 @@ extern int unshare_pid_ns(unsigned long
struct pid_nr **new_pid_nr);
extern int copy_pid_ns(int flags, struct task_struct *tsk);
extern void free_pid_ns(struct kref *kref);
+extern struct pid_namespace *pid_ns(struct pid * pid);
static inline void put_pid_ns(struct pid_namespace *ns)
{
@@ -41,18 +42,12 @@ static inline void put_pid_ns(struct pid
static inline struct pid_namespace *task_pid_ns(struct task_struct *tsk)
{
- return tsk->nsproxy->pid_ns;
-}
-
-static inline void set_task_pid_ns(struct task_struct *tsk,
- struct pid_namespace * ns)
-{
- tsk->nsproxy->pid_ns = ns
+ return pid_ns(task_pid(tsk));
}
static inline struct task_struct *child_reaper(struct task_struct *tsk)
{
- return init_pid_ns.child_reaper;
+ return task_pid_ns(tsk)->child_reaper;
}
#endif /* _LINUX_PID_NS_H */
_______________________________________________
Containers mailing list
Containers at lists.osdl.org
https://lists.osdl.org/mailman/listinfo/containers
More information about the Devel
mailing list