[Devel] [PATCH RHEL7 COMMIT] ms/seccomp: add ptrace options for suspend/resume

Konstantin Khorenko khorenko at virtuozzo.com
Wed Oct 28 08:44:43 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.3
------>
commit 410466e45a9eff9d2e2580c732d8d7629e796c3f
Author: Andrew Vagin <avagin at openvz.org>
Date:   Wed Oct 28 19:44:43 2015 +0400

    ms/seccomp: add ptrace options for suspend/resume
    
    ML: 13c4a90119d28cfcb6b5bdd820c233b86c2b0237
    
    https://jira.sw.ru/browse/PSBM-39517
    
    This patch is the first step in enabling checkpoint/restore of processes
    with seccomp enabled.
    
    One of the things CRIU does while dumping tasks is inject code into them
    via ptrace to collect information that is only available to the process
    itself. However, if we are in a seccomp mode where these processes are
    prohibited from making these syscalls, then what CRIU does kills the task.
    
    This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables
    a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp
    filters to disable (and re-enable) seccomp filters for another task so that
    they can be successfully dumped (and restored). We restrict the set of
    processes that can disable seccomp through ptrace because although today
    ptrace can be used to bypass seccomp, there is some discussion of closing
    this loophole in the future and we would like this patch to not depend on
    that behavior and be future proofed for when it is removed.
    
    Note that seccomp can be suspended before any filters are actually
    installed; this behavior is useful on criu restore, so that we can suspend
    seccomp, restore the filters, unmap our restore code from the restored
    process' address space, and then resume the task by detaching and have the
    filters resumed as well.
    
    v2 changes:
    
    * require that the tracer have no seccomp filters installed
    * drop TIF_NOTSC manipulation from the patch
    * change from ptrace command to a ptrace option and use this ptrace option
      as the flag to check. This means that as soon as the tracer
      detaches/dies, seccomp is re-enabled and as a corrollary that one can not
      disable seccomp across PTRACE_ATTACHs.
    
    v3 changes:
    
    * get rid of various #ifdefs everywhere
    * report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly
      used
    
    v4 changes:
    
    * get rid of may_suspend_seccomp() in favor of a capable() check in ptrace
      directly
    
    v5 changes:
    
    * check that seccomp is not enabled (or suspended) on the tracer
    
    Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
    CC: Will Drewry <wad at chromium.org>
    CC: Roland McGrath <roland at hack.frob.com>
    CC: Pavel Emelyanov <xemul at parallels.com>
    CC: Serge E. Hallyn <serge.hallyn at ubuntu.com>
    Acked-by: Oleg Nesterov <oleg at redhat.com>
    Acked-by: Andy Lutomirski <luto at amacapital.net>
    [kees: access seccomp.mode through seccomp_mode() instead]
    Signed-off-by: Kees Cook <keescook at chromium.org>
    Signed-off-by: Andrew Vagin <avagin at openvz.org>
    
    Acked-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 include/linux/ptrace.h      |  1 +
 include/uapi/linux/ptrace.h |  6 ++++--
 kernel/ptrace.c             | 13 +++++++++++++
 kernel/seccomp.c            |  4 ++++
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 822d877..5e0c65d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -33,6 +33,7 @@
 #define PT_TRACE_SECCOMP	PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
 
 #define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+#define PT_SUSPEND_SECCOMP	(PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
 
 /* single stepping state bits (used on ARM and PA-RISC) */
 #define PT_SINGLESTEP_BIT	31
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index cf1019e..a7a6979 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -89,9 +89,11 @@ struct ptrace_peeksiginfo_args {
 #define PTRACE_O_TRACESECCOMP	(1 << PTRACE_EVENT_SECCOMP)
 
 /* eventless options */
-#define PTRACE_O_EXITKILL	(1 << 20)
+#define PTRACE_O_EXITKILL		(1 << 20)
+#define PTRACE_O_SUSPEND_SECCOMP	(1 << 21)
 
-#define PTRACE_O_MASK		(0x000000ff | PTRACE_O_EXITKILL)
+#define PTRACE_O_MASK		(\
+	0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
 
 #include <asm/ptrace.h>
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 7359678..953ef28 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -592,6 +592,19 @@ static int ptrace_setoptions(struct task_struct *child, unsigned long data)
 	if (data & ~(unsigned long)PTRACE_O_MASK)
 		return -EINVAL;
 
+	if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
+		if (!config_enabled(CONFIG_CHECKPOINT_RESTORE) ||
+		    !config_enabled(CONFIG_SECCOMP))
+			return -EINVAL;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
+		    current->ptrace & PT_SUSPEND_SECCOMP)
+			return -EPERM;
+	}
+
 	/* Avoid intermediate state when all opts are cleared */
 	flags = child->ptrace;
 	flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index eda2da3..c1bac82 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -381,6 +381,10 @@ int __secure_computing(int this_syscall)
 	int *syscall;
 	u32 ret;
 
+	if (config_enabled(CONFIG_CHECKPOINT_RESTORE) &&
+	    unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
+		return 0;
+
 	switch (mode) {
 	case SECCOMP_MODE_STRICT:
 		syscall = mode1_syscalls;



More information about the Devel mailing list