[Devel] [RFC][PATCH 08/16] Define/use pid->upid_list list.
sukadev at us.ibm.com
sukadev at us.ibm.com
Wed May 23 18:11:43 PDT 2007
Subject: Define/use pid->upid_list list.
From: Sukadev Bhattiprolu <sukadev at us.ibm.com>
With multiple pid namespaces, a process would be known by several pid_t
values, one in each pid namespace. To represent this, we introduce a
'struct upid' which associates a single pid_t value with a single pid
namespace.
We then replace the pid->nr field in 'struct pid' with a list of struct upid'
entries (referred to as the pid->upid_list list). This list represents the
multiple pid_t values of the process, one in each namespace.
The struct upid also replaces 'struct pid' in the pid_hash table to enable us
to find processes given a pid_t from any namespace (i.e we find 'struct upid'
for a given pid_t and from the 'struct upid', we find the 'struct pid' of the
process)
We finally reimplement find_pid() and pid_to_nr() to use pid->upid_list
and remove unused fields from 'struct pid'.
Changelog:
2.6.21-mm2-pidns3:
- 'struct upid' used to be called 'struct pid_nr' and a list of these
were hanging off of 'struct pid'. So, we renamed 'struct pid_nr'
and now hold them in a statically sized array in 'struct pid' since
the number of 'struct upid's for a process is known at process-
creation time.
2.6.21-rc3-mm2:
- [Eric Biederman] Combine all logical changes into one patch
- [Eric Biederman] Implement __pid_nr(pid_ns, pid) for use in procfs.
(now called pid_to_nr_in_ns()).
- [Serge Hallyn]: Remove (!pid_nr) check in free_pid_nr()
Signed-off-by: Cedric Le Goater <clg at fr.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev at us.ibm.com>
---
fs/proc/array.c | 30 ++++--
fs/proc/base.c | 9 +-
include/linux/init_task.h | 13 ++
include/linux/pid.h | 35 +++++--
include/linux/pid_namespace.h | 12 ++
kernel/fork.c | 2
kernel/pid.c | 187 +++++++++++++++++++++++++++++++++++-------
7 files changed, 232 insertions(+), 56 deletions(-)
Index: lx26-21-mm2/include/linux/pid.h
===================================================================
--- lx26-21-mm2.orig/include/linux/pid.h 2007-05-22 17:06:48.000000000 -0700
+++ lx26-21-mm2/include/linux/pid.h 2007-05-22 17:06:54.000000000 -0700
@@ -16,6 +16,25 @@ enum pid_type
PIDTYPE_MAX
};
+struct pid_namespace;
+
+/*
+ * A struct upid holds a process identifier (or pid->nr) for a given
+ * pid namespace.
+ *
+ * A list of 'struct upid' entries is stored in the struct pid. This list
+ * is used to get the process identifier associated with the pid
+ * namespace it is being seen from.
+ */
+struct upid
+{
+ /* Try to keep pid_chain in the same cacheline as nr for find_pid */
+ struct hlist_node pid_chain; /* link hash collisions on pid_hash */
+ int nr; /* user space pid number */
+ struct pid_namespace *pid_ns; /* pid namespace in which nr is valid */
+ struct pid *pid; /* back to task's unique kernel pid */
+};
+
/*
* What is struct pid?
*
@@ -48,11 +67,10 @@ enum pid_type
struct pid
{
atomic_t count;
- /* Try to keep pid_chain in the same cacheline as nr for find_pid */
- int nr;
- struct hlist_node pid_chain;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
+ int num_upids;
+ struct upid *upid_list;
struct rcu_head rcu;
};
@@ -100,16 +118,11 @@ extern struct pid *FASTCALL(find_pid(int
extern struct pid *find_get_pid(int nr);
extern struct pid *find_ge_pid(int nr);
-extern struct pid *alloc_pid(enum copy_process_type);
+extern struct pid *dup_struct_pid(enum copy_process_type);
extern void FASTCALL(free_pid(struct pid *pid));
-static inline pid_t pid_to_nr(struct pid *pid)
-{
- pid_t nr = 0;
- if (pid)
- nr = pid->nr;
- return nr;
-}
+extern pid_t pid_to_nr_in_ns(struct pid_namespace *ns, struct pid *pid);
+extern pid_t pid_to_nr(struct pid *pid);
#define do_each_pid_task(pid, type, task) \
do { \
Index: lx26-21-mm2/include/linux/init_task.h
===================================================================
--- lx26-21-mm2.orig/include/linux/init_task.h 2007-05-22 17:06:48.000000000 -0700
+++ lx26-21-mm2/include/linux/init_task.h 2007-05-22 17:06:54.000000000 -0700
@@ -89,16 +89,23 @@ extern struct nsproxy init_nsproxy;
extern struct group_info init_groups;
-#define INIT_STRUCT_PID { \
- .count = ATOMIC_INIT(1), \
- .nr = 0, \
+#define INIT_STRUCT_UPID { \
+ .nr = 0, \
/* Don't put this struct pid in pid_hash */ \
.pid_chain = { .next = NULL, .pprev = NULL }, \
+ .pid_ns = &init_pid_ns, \
+ .pid = &init_struct_pid, \
+}
+
+#define INIT_STRUCT_PID { \
+ .count = ATOMIC_INIT(1), \
.tasks = { \
{ .first = &init_task.pids[PIDTYPE_PID].node }, \
{ .first = &init_task.pids[PIDTYPE_PGID].node }, \
{ .first = &init_task.pids[PIDTYPE_SID].node }, \
}, \
+ .num_upids = 1, \
+ .upid_list = &init_struct_upid, \
.rcu = RCU_HEAD_INIT, \
}
Index: lx26-21-mm2/kernel/pid.c
===================================================================
--- lx26-21-mm2.orig/kernel/pid.c 2007-05-22 17:06:48.000000000 -0700
+++ lx26-21-mm2/kernel/pid.c 2007-05-22 17:06:54.000000000 -0700
@@ -33,6 +33,7 @@
static struct hlist_head *pid_hash;
static int pidhash_shift;
static struct kmem_cache *pid_cachep;
+struct upid init_struct_upid = INIT_STRUCT_UPID;
struct pid init_struct_pid = INIT_STRUCT_PID;
int pid_max = PID_MAX_DEFAULT;
@@ -195,13 +196,104 @@ static int next_pidmap(struct pid_namesp
return -1;
}
+static void clear_upid(struct upid *upid)
+{
+ /* We can be called with write_lock_irq(&tasklist_lock) held */
+ unsigned long flags;
+
+ free_pidmap(upid->pid_ns, upid->nr);
+
+ spin_lock_irqsave(&pidmap_lock, flags);
+ hlist_del_rcu(&upid->pid_chain);
+ spin_unlock_irqrestore(&pidmap_lock, flags);
+
+ put_pid_ns(upid->pid_ns);
+}
+
+static int init_upid(struct upid *upid, struct pid *pid,
+ struct pid_namespace *pid_ns)
+{
+ int nr;
+
+ nr = alloc_pidmap(pid_ns);
+ if (nr < 0)
+ return nr;
+
+ upid->pid_ns = pid_ns;
+ get_pid_ns(pid_ns);
+ upid->nr = nr;
+
+ /*
+ * The struct pid and list of struct upid_list represent a process
+ * with multiple pid_t values, one in each pid namespace. The list
+ * of pid_t values of a process, represented by pid->upid_list list,
+ * never changes during the life of the process. As such, struct
+ * pid and its upid_list list maybe viewed as a single object i.e
+ * they are created/destroyed together. So we do not need a
+ * reference to struct pid here.
+ */
+ upid->pid = pid;
+
+ INIT_HLIST_NODE(&upid->pid_chain);
+ spin_lock_irq(&pidmap_lock);
+ hlist_add_head_rcu(&upid->pid_chain, &pid_hash[pid_hashfn(nr)]);
+ spin_unlock_irq(&pidmap_lock);
+
+ return 0;
+}
+
+/*
+ * Return the pid_t by which the process @pid is known in the pid
+ * namespace @ns.
+ *
+ * Return 0 if:
+ * - @pid is NULL (eg: procfs calls this for task_pgrp(init_task)
+ * which is NULL).
+ *
+ * - process does not have pid_t in the namespace @ns (eg: parent
+ * process of a child reaper does not exist in the child namespace.
+ * A getppid() call by the child reaper results in 0).
+ */
+pid_t pid_to_nr_in_ns(struct pid_namespace *ns, struct pid *pid)
+{
+ int i;
+ struct upid *upid;
+
+ if (!pid)
+ return 0;
+
+ upid = &pid->upid_list[0];
+ for (i = 0; i < pid->num_upids; i++, upid++) {
+ if (upid->pid_ns == ns)
+ return upid->nr;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pid_to_nr_in_ns);
+
+/*
+ * Return the pid_t by which the process @pid is known in the active
+ * pid namespace of the caller.
+ *
+ * pid_to_nr() cannot be static inline if task_active_pid_ns() is
+ * inline as it would cause a circular dependency between pid.h
+ * and pid_namespace.h.
+ */
+pid_t pid_to_nr(struct pid *pid)
+{
+ return pid_to_nr_in_ns(task_active_pid_ns(current), pid);
+}
+EXPORT_SYMBOL_GPL(pid_to_nr);
+
fastcall void put_pid(struct pid *pid)
{
if (!pid)
return;
+
if ((atomic_read(&pid->count) == 1) ||
- atomic_dec_and_test(&pid->count))
+ atomic_dec_and_test(&pid->count)) {
kmem_cache_free(pid_cachep, pid);
+ }
}
EXPORT_SYMBOL_GPL(put_pid);
@@ -213,66 +305,95 @@ static void delayed_put_pid(struct rcu_h
fastcall void free_pid(struct pid *pid)
{
- /* We can be called with write_lock_irq(&tasklist_lock) held */
- unsigned long flags;
+ int i;
+ struct upid *upid = &pid->upid_list[0];
/* check this here to keep copy_process() cleaner */
if (unlikely(pid == &init_struct_pid))
return;
- spin_lock_irqsave(&pidmap_lock, flags);
- hlist_del_rcu(&pid->pid_chain);
- spin_unlock_irqrestore(&pidmap_lock, flags);
+ /* clear any upids that we actually initialized */
+ for (i = 0; i < pid->num_upids; i++, upid++) {
+ if (upid->pid_ns)
+ clear_upid(upid);
+ else
+ break;
+ }
- free_pidmap(&init_pid_ns, pid->nr);
call_rcu(&pid->rcu, delayed_put_pid);
}
-struct pid *alloc_pid(enum copy_process_type copy_src)
+static struct pid *alloc_struct_pid(int num_upids)
{
struct pid *pid;
enum pid_type type;
- int nr = -1;
-
- /* check this here to keep copy_process() cleaner */
- if (unlikely(copy_src == COPY_IDLE_PROCESS))
- return &init_struct_pid;
+ struct upid *upid_list;
+ void *pid_end;
+ /* for now we only support one pid namespace */
+ BUG_ON(num_upids != 1);
pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
if (!pid)
- goto out;
+ return NULL;
- nr = alloc_pidmap(task_active_pid_ns(current));
- if (nr < 0)
- goto out_free;
+ pid_end = (void *)pid + sizeof(struct pid);
+ pid->upid_list = (struct upid *)pid_end;
atomic_set(&pid->count, 1);
- pid->nr = nr;
+ pid->num_upids = num_upids;
+
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
- spin_lock_irq(&pidmap_lock);
- hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
- spin_unlock_irq(&pidmap_lock);
+ return pid;
+}
+
+struct pid *dup_struct_pid(enum copy_process_type copy_src)
+{
+ int rc;
+ int i;
+ int num_upids;
+ struct pid *pid;
+ struct upid *upid;
+ struct upid *parent_upid;
+ struct pid *parent_pid = task_pid(current);
+
+ /* check this here to keep copy_process() cleaner */
+ if (unlikely(copy_src == COPY_IDLE_PROCESS))
+ return &init_struct_pid;
+
+ num_upids = parent_pid->num_upids;
+
+ pid = alloc_struct_pid(num_upids);
+ if (!pid)
+ return NULL;
+
+ upid = &pid->upid_list[0];
+ parent_upid = &parent_pid->upid_list[0];
+
+ for (i = 0; i < num_upids; i++, upid++, parent_upid++) {
+ rc = init_upid(upid, pid, parent_upid->pid_ns);
+ if (rc < 0)
+ goto out_free_pid;
+ }
-out:
return pid;
-out_free:
- kmem_cache_free(pid_cachep, pid);
- pid = NULL;
- goto out;
+out_free_pid:
+ free_pid(pid);
+ return NULL;
}
struct pid * fastcall find_pid(int nr)
{
struct hlist_node *elem;
- struct pid *pid;
+ struct upid *upid;
+ struct pid_namespace *ns = task_active_pid_ns(current);
- hlist_for_each_entry_rcu(pid, elem,
+ hlist_for_each_entry_rcu(upid, elem,
&pid_hash[pid_hashfn(nr)], pid_chain) {
- if (pid->nr == nr)
- return pid;
+ if ((upid->pid_ns == ns) && (upid->nr == nr))
+ return upid->pid;
}
return NULL;
}
@@ -436,10 +557,14 @@ void __init pidhash_init(void)
void __init pidmap_init(void)
{
+ int pid_elem_size;
+
init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
- pid_cachep = KMEM_CACHE(pid, SLAB_PANIC);
+ pid_elem_size = sizeof(struct pid) + sizeof(struct upid);
+ pid_cachep = kmem_cache_create("pid+1upid", pid1_elem_size, 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
}
Index: lx26-21-mm2/fs/proc/array.c
===================================================================
--- lx26-21-mm2.orig/fs/proc/array.c 2007-05-22 17:06:48.000000000 -0700
+++ lx26-21-mm2/fs/proc/array.c 2007-05-22 17:06:54.000000000 -0700
@@ -75,6 +75,7 @@
#include <linux/cpuset.h>
#include <linux/rcupdate.h>
#include <linux/delayacct.h>
+#include <linux/pid_namespace.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -161,8 +162,18 @@ static inline char * task_state(struct t
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
+ pid_t ppid = 0;
+ pid_t tracer_pid = 0;
+ /* TODO get pid_ns from proc mnt rather than current */
+ struct pid_namespace *ns = task_active_pid_ns(current);
rcu_read_lock();
+
+ if (pid_alive(p)) {
+ ppid = pid_to_nr_in_ns(ns, task_parent_tgid(p));
+ tracer_pid = pid_to_nr_in_ns(ns, task_tracer_pid(p));
+ }
+
buffer += sprintf(buffer,
"State:\t%s\n"
"SleepAVG:\t%lu%%\n"
@@ -174,9 +185,10 @@ static inline char * task_state(struct t
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
(p->sleep_avg/1024)*100/(1020000000/1024),
- p->tgid, p->pid,
- pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
- pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
+ pid_to_nr_in_ns(ns, task_tgid(p)),
+ pid_to_nr_in_ns(ns, task_pid(p)),
+ ppid,
+ tracer_pid,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
@@ -330,6 +342,8 @@ static int do_task_stat(struct task_stru
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
+ /* TODO get pid_ns from proc mnt rather than current */
+ struct pid_namespace *ns = task_active_pid_ns(current);
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -351,7 +365,7 @@ static int do_task_stat(struct task_stru
struct signal_struct *sig = task->signal;
if (sig->tty) {
- tty_pgrp = pid_to_nr(sig->tty->pgrp);
+ tty_pgrp = pid_to_nr_in_ns(ns, sig->tty->pgrp);
tty_nr = new_encode_dev(tty_devnum(sig->tty));
}
@@ -381,9 +395,9 @@ static int do_task_stat(struct task_stru
stime = cputime_add(stime, sig->stime);
}
- sid = signal_session(sig);
- pgid = process_group(task);
- ppid = rcu_dereference(task->real_parent)->tgid;
+ sid = pid_to_nr_in_ns(ns, task_session(task));
+ pgid = pid_to_nr_in_ns(ns, task_pgrp(task));
+ ppid = pid_to_nr_in_ns(ns, task_parent_tgid(task));
unlock_task_sighand(task, &flags);
}
@@ -413,7 +427,7 @@ static int do_task_stat(struct task_stru
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n",
- task->pid,
+ pid_to_nr_in_ns(ns, task_pid(task)),
tcomm,
state,
ppid,
Index: lx26-21-mm2/fs/proc/base.c
===================================================================
--- lx26-21-mm2.orig/fs/proc/base.c 2007-05-22 17:06:48.000000000 -0700
+++ lx26-21-mm2/fs/proc/base.c 2007-05-22 17:06:54.000000000 -0700
@@ -72,6 +72,7 @@
#include <linux/audit.h>
#include <linux/poll.h>
#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
#include <linux/oom.h>
#include "internal.h"
@@ -2136,13 +2137,15 @@ static struct task_struct *next_tgid(uns
{
struct task_struct *task;
struct pid *pid;
+ /* TODO get pid_ns from proc mnt rather than current */
+ struct pid_namespace *ns = task_active_pid_ns(current);
rcu_read_lock();
retry:
task = NULL;
pid = find_ge_pid(tgid);
if (pid) {
- tgid = pid->nr + 1;
+ tgid = pid_to_nr_in_ns(ns, pid) + 1;
task = pid_task(pid, PIDTYPE_PID);
/* What we to know is if the pid we have find is the
* pid of a thread_group_leader. Testing for task
@@ -2182,6 +2185,8 @@ int proc_pid_readdir(struct file * filp,
struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
struct task_struct *task;
int tgid;
+ /* TODO get pid_ns from proc mnt rather than current */
+ struct pid_namespace *ns = task_active_pid_ns(current);
if (!reaper)
goto out_no_task;
@@ -2196,7 +2201,7 @@ int proc_pid_readdir(struct file * filp,
for (task = next_tgid(tgid);
task;
put_task_struct(task), task = next_tgid(tgid + 1)) {
- tgid = task->pid;
+ tgid = pid_to_nr_in_ns(ns, task_pid(task));
filp->f_pos = tgid + TGID_OFFSET;
if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
put_task_struct(task);
Index: lx26-21-mm2/include/linux/pid_namespace.h
===================================================================
--- lx26-21-mm2.orig/include/linux/pid_namespace.h 2007-05-22 17:06:48.000000000 -0700
+++ lx26-21-mm2/include/linux/pid_namespace.h 2007-05-22 17:06:54.000000000 -0700
@@ -15,6 +15,18 @@ struct pidmap {
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+/*
+ * Some properties/terminology for pid namespaces:
+ *
+ * Processes currently exist only in (or belong only to) the init_pid_ns.
+ * When we introduce the ability to clone the pid namespace, a process
+ * would exist in several namespaces. Off the many namespaces that the
+ * process can exist, one namespace is special and we refer to it as the
+ * 'active pid namespace' of the process.
+ *
+ * For now, we have only one pid namespace - init_pid_ns which is the
+ * 'active pid namespace' for all processes in the system.
+ */
struct pid_namespace {
struct kref kref;
struct pidmap pidmap[PIDMAP_ENTRIES];
Index: lx26-21-mm2/kernel/fork.c
===================================================================
--- lx26-21-mm2.orig/kernel/fork.c 2007-05-22 17:06:48.000000000 -0700
+++ lx26-21-mm2/kernel/fork.c 2007-05-22 17:06:54.000000000 -0700
@@ -1026,7 +1026,7 @@ static struct task_struct *copy_process(
if (p->binfmt && !try_module_get(p->binfmt->module))
goto bad_fork_cleanup_put_domain;
- pid = alloc_pid(copy_src);
+ pid = dup_struct_pid(copy_src);
if (!pid)
goto bad_fork_put_binfmt_module;
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list