[Devel] [PATCH 17/17] Pid-NS(V3) Introduce proc_mnt for pid_ns
sukadev at us.ibm.com
sukadev at us.ibm.com
Sat Jun 16 16:05:55 PDT 2007
From: sukadev at linux.vnet.ibm.com
Subject: [PATCH 17/17] Pid-NS(V3) Introduce proc_mnt for pid_ns
The following patch completes the removal of the global proc_mnt.
It fetches the mnt on which to do dentry invalidations from the
pid_namespace in which the task appears.
For now, there is only one pid namespace in mainline so this is
straightforward. In the -lxc tree we'll have to do something
more complex. The proc_flush_task() code takes a task, and
needs to be able to find the corresponding proc superblocks on
which that tasks's /proc/<pid> directories could appear. We
can tell in which pid namespaces a task appears, so I put a
pointer from the pid namespace to the corresponding proc_mnt.
/proc currently has some special code to make sure that the root
directory gets set up correctly. It proc_mnt variable in order
to find its way to the root inode.
Changelog:
2.6.22-rc4-mm2-pidns1:
- Call proc_fill_super once per pid namespace
- Call proc_flush_task() before detaching a task's 'struct pid'.
- Get a reference to pid namespace when mounting/remounting /proc.
Put this reference when unmounting.
Signed-off-by: Dave Hansen <haveblue at us.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev at us.ibm.com>
---
fs/proc/base.c | 30 ++++++++++++++++++-
fs/proc/inode.c | 12 ++++++-
fs/proc/root.c | 65 ++++++++++++++++++++++++++++++------------
include/linux/pid_namespace.h | 1
include/linux/proc_fs.h | 1
kernel/exit.c | 2 -
6 files changed, 87 insertions(+), 24 deletions(-)
Index: lx26-22-rc4-mm2/fs/proc/base.c
===================================================================
--- lx26-22-rc4-mm2.orig/fs/proc/base.c 2007-06-16 02:37:15.000000000 -0700
+++ lx26-22-rc4-mm2/fs/proc/base.c 2007-06-16 04:15:23.000000000 -0700
@@ -2164,9 +2164,11 @@ static const struct inode_operations pro
};
/**
- * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
+ * proc_flush_task_from_pid_ns - Remove dcache entries for @task
+ * from the /proc dcache.
*
* @task: task that should be flushed.
+ * @pid_ns: pid_namespace in which that task appears
*
* Looks in the dcache for
* /proc/@pid
@@ -2184,11 +2186,20 @@ static const struct inode_operations pro
* that no dcache entries will exist at process exit time it
* just makes it very unlikely that any will persist.
*/
-void proc_flush_task(struct task_struct *task)
+static void proc_flush_task_from_pid_ns(struct task_struct *task,
+ struct pid_namespace* pid_ns)
{
struct dentry *dentry, *leader, *dir;
char buf[PROC_NUMBUF];
struct qstr name;
+ struct vfsmount *proc_mnt = pid_ns->proc_mnt;
+
+ /*
+ * It is possible that no /procs have been instantiated
+ * for this particular pid namespace.
+ */
+ if (!proc_mnt || !proc_mnt->mnt_root)
+ return;
name.name = buf;
name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
@@ -2230,6 +2241,21 @@ out:
return;
}
+void proc_flush_task(struct task_struct *task)
+{
+ int i;
+ struct pid *pid;
+ struct upid* upid;
+
+ pid = task_pid(task);
+ if (!pid)
+ return;
+
+ upid = &pid->upid_list[0];
+ for (i = 0; i < pid->num_upids; i++, upid++)
+ proc_flush_task_from_pid_ns(task, upid->pid_ns);
+}
+
static struct dentry *proc_pid_instantiate(struct inode *dir,
struct dentry * dentry,
struct task_struct *task, const void *ptr)
Index: lx26-22-rc4-mm2/fs/proc/inode.c
===================================================================
--- lx26-22-rc4-mm2.orig/fs/proc/inode.c 2007-06-16 02:37:15.000000000 -0700
+++ lx26-22-rc4-mm2/fs/proc/inode.c 2007-06-16 04:15:23.000000000 -0700
@@ -6,6 +6,7 @@
#include <linux/time.h>
#include <linux/proc_fs.h>
+#include <linux/hardirq.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
@@ -19,6 +20,7 @@
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <linux/pid_namespace.h>
#include "internal.h"
@@ -75,8 +77,6 @@ static void proc_delete_inode(struct ino
clear_inode(inode);
}
-struct vfsmount *proc_mnt;
-
static void proc_read_inode(struct inode * inode)
{
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -431,6 +431,8 @@ out_mod:
int proc_fill_super(struct super_block *s, void *data, int silent)
{
+ struct pid_namespace *pid_ns = data;
+ struct proc_inode *ei;
struct inode * root_inode;
s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
@@ -439,6 +441,7 @@ int proc_fill_super(struct super_block *
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
+ s->s_fs_info = get_pid_ns(pid_ns);
de_get(&proc_root);
root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
@@ -449,6 +452,11 @@ int proc_fill_super(struct super_block *
s->s_root = d_alloc_root(root_inode);
if (!s->s_root)
goto out_no_root;
+ /* Seed the root directory with a pid so it doesn't need
+ * to be special in base.c.
+ */
+ ei = PROC_I(root_inode);
+ ei->pid = find_get_pid(1);
return 0;
out_no_root:
Index: lx26-22-rc4-mm2/fs/proc/root.c
===================================================================
--- lx26-22-rc4-mm2.orig/fs/proc/root.c 2007-06-16 04:15:23.000000000 -0700
+++ lx26-22-rc4-mm2/fs/proc/root.c 2007-06-16 04:15:23.000000000 -0700
@@ -12,38 +12,73 @@
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
+#include <linux/hardirq.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/smp_lock.h>
#include <linux/mount.h>
+#include <linux/pid_namespace.h>
#include "internal.h"
struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
+static int proc_test_sb(struct super_block *s, void *data)
+{
+ struct pid_namespace *pid_ns = data;
+ if (s->s_fs_info == pid_ns)
+ return 1;
+ return 0;
+}
+
static int proc_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
- if (proc_mnt) {
- /* Seed the root directory with a pid so it doesn't need
- * to be special in base.c. I would do this earlier but
- * the only task alive when /proc is mounted the first time
- * is the init_task and it doesn't have any pids.
- */
- struct proc_inode *ei;
- ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
- if (!ei->pid)
- ei->pid = find_get_pid(1);
+ int error;
+ struct super_block *s;
+ struct pid_namespace *pid_ns;
+
+ /*
+ * We can eventually derive this out of whatever mount
+ * arguments the user supplies, but just take it from
+ * current for now.
+ */
+ pid_ns = task_active_pid_ns(current);
+
+ s = sget(fs_type, proc_test_sb, set_anon_super, pid_ns);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ if (!s->s_root) {
+ error = proc_fill_super(s, pid_ns, 0);
+ if (error) {
+ deactivate_super(s);
+ return error;
+ }
}
- return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
+
+ if (!pid_ns->proc_mnt)
+ pid_ns->proc_mnt = mnt;
+
+ do_remount_sb(s, flags, data, 0);
+ return simple_set_mnt(mnt, s);
+}
+
+static void proc_kill_sb(struct super_block *s)
+{
+ struct pid_namespace *pid_ns = s->s_fs_info;
+
+ pid_ns->proc_mnt = NULL;
+ put_pid_ns(pid_ns);
+ kill_anon_super(s);
}
static struct file_system_type proc_fs_type = {
.name = "proc",
.get_sb = proc_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = proc_kill_sb,
.fs_supers = LIST_HEAD_INIT(proc_fs_type.fs_supers),
};
@@ -55,12 +90,6 @@ void __init proc_root_init(void)
err = register_filesystem(&proc_fs_type);
if (err)
return;
- proc_mnt = kern_mount(&proc_fs_type);
- err = PTR_ERR(proc_mnt);
- if (IS_ERR(proc_mnt)) {
- unregister_filesystem(&proc_fs_type);
- return;
- }
proc_misc_init();
proc_net = proc_mkdir("net", NULL);
proc_net_stat = proc_mkdir("net/stat", NULL);
Index: lx26-22-rc4-mm2/include/linux/pid_namespace.h
===================================================================
--- lx26-22-rc4-mm2.orig/include/linux/pid_namespace.h 2007-06-16 04:15:23.000000000 -0700
+++ lx26-22-rc4-mm2/include/linux/pid_namespace.h 2007-06-16 04:15:23.000000000 -0700
@@ -28,6 +28,7 @@ struct pid_namespace {
int last_pid;
struct task_struct *child_reaper;
atomic_t terminating;
+ struct vfsmount *proc_mnt;
};
extern struct pid_namespace init_pid_ns;
Index: lx26-22-rc4-mm2/include/linux/proc_fs.h
===================================================================
--- lx26-22-rc4-mm2.orig/include/linux/proc_fs.h 2007-06-16 02:37:15.000000000 -0700
+++ lx26-22-rc4-mm2/include/linux/proc_fs.h 2007-06-16 04:15:23.000000000 -0700
@@ -125,7 +125,6 @@ extern struct proc_dir_entry *create_pro
struct proc_dir_entry *parent);
extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
-extern struct vfsmount *proc_mnt;
extern int proc_fill_super(struct super_block *,void *,int);
extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
Index: lx26-22-rc4-mm2/kernel/exit.c
===================================================================
--- lx26-22-rc4-mm2.orig/kernel/exit.c 2007-06-16 04:15:23.000000000 -0700
+++ lx26-22-rc4-mm2/kernel/exit.c 2007-06-16 04:15:23.000000000 -0700
@@ -157,6 +157,7 @@ void release_task(struct task_struct * p
struct task_struct *leader;
int zap_leader;
repeat:
+ proc_flush_task(p);
atomic_dec(&p->user->processes);
write_lock_irq(&tasklist_lock);
ptrace_unlink(p);
@@ -185,7 +186,6 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
- proc_flush_task(p);
release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct);
--
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list