[Devel] [PATCH RHEL7 COMMIT] ms/pidns: expose task pid_ns_for_children to userspace
Konstantin Khorenko
khorenko at virtuozzo.com
Fri May 12 04:46:37 PDT 2017
The commit is pushed to "branch-rh7-3.10.0-514.16.1.vz7.32.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.16.1.vz7.32.1
------>
commit accbb8b5adaa51e92d645ae1a70598fbd1fbafdc
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Fri May 12 15:46:37 2017 +0400
ms/pidns: expose task pid_ns_for_children to userspace
ms commit: eaa0d190bfe1ed891b814a52712dcd852554cb08
pid_ns_for_children set by a task is known only to the task itself, and
it's impossible to identify it from outside.
It's a big problem for checkpoint/restore software like CRIU, because it
can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of
their work.
This patch solves the problem, and it exposes pid_ns_for_children to ns
directory in standard way with the name "pid_for_children":
~# ls /proc/5531/ns -l | grep pid
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]
Link: http://lkml.kernel.org/r/149201123914.6007.2187327078064239572.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Cc: Andrei Vagin <avagin at virtuozzo.com>
Cc: Andreas Gruenbacher <agruenba at redhat.com>
Cc: Kees Cook <keescook at chromium.org>
Cc: Michael Kerrisk <mtk.manpages at googlemail.com>
Cc: Al Viro <viro at zeniv.linux.org.uk>
Cc: Oleg Nesterov <oleg at redhat.com>
Cc: Paul Moore <paul at paul-moore.com>
Cc: Eric Biederman <ebiederm at xmission.com>
Cc: Andy Lutomirski <luto at amacapital.net>
Cc: Ingo Molnar <mingo at kernel.org>
Cc: Serge Hallyn <serge at hallyn.com>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
https://jira.sw.ru/browse/PSBM-58669
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
fs/proc/namespaces.c | 1 +
include/linux/proc_ns.h | 1 +
kernel/pid_namespace.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 35 insertions(+)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index e70c2d3..e317033 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -27,6 +27,7 @@ static const struct proc_ns_operations *ns_entries[] = {
#endif
#ifdef CONFIG_PID_NS
&pidns_operations,
+ &pidns_for_children_operations,
#endif
#ifdef CONFIG_USER_NS
&userns_operations,
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index b8dc2bc..8deba57 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -27,6 +27,7 @@ extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
extern const struct proc_ns_operations ipcns_operations;
extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
extern const struct proc_ns_operations userns_operations;
extern const struct proc_ns_operations mntns_operations;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index b7cf629..594167d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -336,6 +336,29 @@ static void *pidns_get(struct task_struct *task)
return ns;
}
+static void *pidns_for_children_get(struct task_struct *task)
+{
+ struct pid_namespace *ns = NULL;
+
+ task_lock(task);
+ if (task->nsproxy) {
+ ns = task->nsproxy->pid_ns;
+ get_pid_ns(ns);
+ }
+ task_unlock(task);
+
+ if (ns) {
+ read_lock(&tasklist_lock);
+ if (!ns->child_reaper) {
+ put_pid_ns(ns);
+ ns = NULL;
+ }
+ read_unlock(&tasklist_lock);
+ }
+
+ return ns;
+}
+
static void pidns_put(void *ns)
{
put_pid_ns(ns);
@@ -387,6 +410,16 @@ const struct proc_ns_operations pidns_operations = {
.inum = pidns_inum,
};
+const struct proc_ns_operations pidns_for_children_operations = {
+ .name = "pid_for_children",
+ .real_ns_name = "pid",
+ .type = CLONE_NEWPID,
+ .get = pidns_for_children_get,
+ .put = pidns_put,
+ .install = pidns_install,
+ .inum = pidns_inum,
+};
+
static __init int pid_namespaces_init(void)
{
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
More information about the Devel
mailing list