[Devel] [PATCH 14/15] Destroy pid namespace on init's death

Pavel Emelyanov xemul at openvz.org
Thu Jul 26 07:57:39 PDT 2007


From: Sukadev Bhattiprolu <sukadev at us.ibm.com>

Terminate all processes in a namespace when the reaper of the namespace
is exiting. We do this by walking the pidmap of the namespace and sending
SIGKILL to all processes.

Signed-off-by: Sukadev Bhattiprolu <sukadev at us.ibm.com>
Acked-by: Pavel Emelyanov <xemul at openvz.org>

---

 include/linux/pid.h  |    1 +
 include/linux/wait.h |    4 ++++
 kernel/exit.c        |   10 ++++++----
 kernel/pid.c         |   38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 4 deletions(-)

diff -upr linux-2.6.23-rc1-mm1.orig/include/linux/pid.h linux-2.6.23-rc1-mm1-7/include/linux/pid.h
--- linux-2.6.23-rc1-mm1.orig/include/linux/pid.h	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/include/linux/pid.h	2007-07-26 16:36:37.000000000 +0400
@@ -83,6 +92,7 @@ extern void FASTCALL(detach_pid(struct t
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void FASTCALL(free_pid(struct pid *pid));
+extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 
 /*
  * the helpers to get the pid's id seen from different namespaces
diff -upr linux-2.6.23-rc1-mm1.orig/include/linux/wait.h linux-2.6.23-rc1-mm1-7/include/linux/wait.h
--- linux-2.6.23-rc1-mm1.orig/include/linux/wait.h	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/include/linux/wait.h	2007-07-26 16:36:36.000000000 +0400
@@ -22,9 +22,13 @@
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+#include <linux/resource.h>
+#include <asm/siginfo.h>
 #include <asm/system.h>
 #include <asm/current.h>
 
+long do_wait(pid_t pid, int options, struct siginfo __user *infop,
+		int __user *stat_addr, struct rusage __user *ru);
 typedef struct __wait_queue wait_queue_t;
 typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int default_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
diff -upr linux-2.6.23-rc1-mm1.orig/kernel/exit.c linux-2.6.23-rc1-mm1-7/kernel/exit.c
--- linux-2.6.23-rc1-mm1.orig/kernel/exit.c	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/kernel/exit.c	2007-07-26 16:36:37.000000000 +0400
@@ -895,6 +915,7 @@ fastcall NORET_TYPE void do_exit(long co
 {
 	struct task_struct *tsk = current;
 	int group_dead;
+	struct pid_namespace *pid_ns = tsk->nsproxy->pid_ns;
 
 	profile_task_exit(tsk);
 
@@ -905,9 +926,10 @@ fastcall NORET_TYPE void do_exit(long co
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
 	if (unlikely(tsk == task_child_reaper(tsk))) {
-		if (task_active_pid_ns(tsk) != &init_pid_ns)
-			task_active_pid_ns(tsk)->child_reaper =
-					init_pid_ns.child_reaper;
+		if (pid_ns != &init_pid_ns) {
+			zap_pid_ns_processes(pid_ns);
+			pid_ns->child_reaper = init_pid_ns.child_reaper;
+		}
 		else
 			panic("Attempted to kill init!");
 	}
@@ -1518,7 +1548,7 @@ static inline int my_ptrace_child(struct
 	return (p->parent != p->real_parent);
 }
 
-static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
+long do_wait(pid_t pid, int options, struct siginfo __user *infop,
 		    int __user *stat_addr, struct rusage __user *ru)
 {
 	DECLARE_WAITQUEUE(wait, current);
diff -upr linux-2.6.23-rc1-mm1.orig/kernel/pid.c linux-2.6.23-rc1-mm1-7/kernel/pid.c
--- linux-2.6.23-rc1-mm1.orig/kernel/pid.c	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/kernel/pid.c	2007-07-26 16:36:37.000000000 +0400
@@ -428,6 +520,44 @@ err_alloc:
 	return new_ns;
 }
 
+void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+{
+	int i;
+	int nr;
+	int nfree;
+	int options = WNOHANG|WEXITED|__WALL;
+
+repeat:
+	/*
+	 * We know pid == 1 is terminating. Find remaining pid_ts
+	 * in the namespace, signal them and then wait for them
+	 * exit.
+	 */
+	nr = next_pidmap(pid_ns, 1);
+	while (nr > 0) {
+		kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
+		nr = next_pidmap(pid_ns, nr);
+	}
+
+	nr = next_pidmap(pid_ns, 1);
+	while (nr > 0) {
+		do_wait(nr, options, NULL, NULL, NULL);
+		nr = next_pidmap(pid_ns, nr);
+	}
+
+	nfree = 0;
+	for (i = 0; i < PIDMAP_ENTRIES; i++)
+		nfree += atomic_read(&pid_ns->pidmap[i].nr_free);
+
+	/*
+	 * If pidmap has entries for processes other than 0 and 1, retry.
+	 */
+	if (nfree < (BITS_PER_PAGE * PIDMAP_ENTRIES - 2))
+		goto repeat;
+
+	return;
+}
+
 static void do_free_pid_ns(struct work_struct *w)
 {
 	struct pid_namespace *ns, *parent;




More information about the Devel mailing list