[CRIU] [PATCH 1/3] seccomp: add initial support for SECCOMP_MODE_STRICT
Pavel Emelyanov
xemul at parallels.com
Tue Jun 2 05:40:24 PDT 2015
Cool :) And can you elaborate on why exactly the filtered mode cannot be
supported?
More comments inline.
> diff --git a/cr-dump.c b/cr-dump.c
> index f865967..72533f1 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -19,6 +19,8 @@
> #include <sched.h>
> #include <sys/resource.h>
>
> +#include <linux/seccomp.h>
> +
> #include "protobuf.h"
> #include "protobuf/fdinfo.pb-c.h"
> #include "protobuf/fs.pb-c.h"
> @@ -551,6 +553,22 @@ err:
> return ret;
> }
>
> +static int get_seccomp_state(pid_t pid, TaskCoreEntry *tc)
> +{
> + struct proc_status_creds cr;
> + int ret;
> +
> + ret = parse_pid_status(pid, &cr);
> + if (ret < 0)
> + return -1;
Going to read proc files slows things down. Can we take the status
parse result from the seize_task() here?
> + if (cr.seccomp_mode != SECCOMP_MODE_DISABLED) {
> + tc->has_seccomp_mode = true;
> + tc->seccomp_mode = cr.seccomp_mode;
> + }
> + return 0;
> +}
> +
> static DECLARE_KCMP_TREE(vm_tree, KCMP_VM);
> static DECLARE_KCMP_TREE(fs_tree, KCMP_FS);
> static DECLARE_KCMP_TREE(files_tree, KCMP_FILES);
> @@ -672,6 +690,10 @@ static int dump_task_core_all(struct pstree_item *item,
> if (ret < 0)
> goto err;
>
> + ret = get_seccomp_state(pid, core->tc);
> + if (ret < 0)
> + goto err;
> +
> strncpy((char *)core->tc->comm, stat->comm, TASK_COMM_LEN);
> core->tc->flags = stat->flags;
> core->tc->task_state = item->state;
> diff --git a/cr-restore.c b/cr-restore.c
> index aa00dc2..ea9d8d2 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -24,6 +24,8 @@
>
> #include <sys/sendfile.h>
>
> +#include <linux/seccomp.h>
> +
> #include "ptrace.h"
> #include "compiler.h"
> #include "asm/types.h"
> @@ -756,6 +758,36 @@ static int collect_helper_pids()
> return 0;
> }
>
> +/*
> + * Prepare the seccomp state for the process. Note that seccomp is restored in
> + * suspended mode, and then unsuspended after the restorer blob completes in
> + * finalize_restore().
> + */
I'm not sure I get this. The prctl below will just turn the unsuspended strict mode
on. The ptrace-s in fork_with_pid() will try to suspend one, but this looks racy.
What prevents the prctl from being called _before_ the mentioned ptrace-s?
> +static int prepare_seccomp(pid_t pid, CoreEntry *core)
> +{
> + if (core->tc->seccomp_mode == SECCOMP_MODE_DISABLED)
> + return 0;
> +
> + pr_info("restoring seccomp mode %d for %d\n", core->tc->seccomp_mode, pid);
> +
> + switch (core->tc->seccomp_mode) {
> + case SECCOMP_MODE_STRICT:
> + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0)) {
> + pr_perror("setting seccomp failed");
> + return -1;
> + }
> + break;
> + case SECCOMP_MODE_FILTER:
> + pr_err("seccomp mode 2 not supported\n");
> + return -1;
> + default:
> + pr_err("unknown seccomp mode %d\n", core->tc->seccomp_mode);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> static int open_cores(int pid, CoreEntry *leader_core)
> {
> int i, tpid;
> @@ -826,6 +858,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
> if (collect_helper_pids() < 0)
> return -1;
>
> + if (prepare_seccomp(pid, core) < 0)
> + return -1;
> +
> if (inherit_fd_fini() < 0)
> return -1;
>
> @@ -1135,13 +1170,57 @@ static inline int fork_with_pid(struct pstree_item *item)
> goto err_unlock;
> }
>
> -
> if (item == root_item) {
> item->pid.real = ret;
> pr_debug("PID: real %d virt %d\n",
> item->pid.real, item->pid.virt);
> }
>
> + if (ca.core->tc->seccomp_mode != SECCOMP_MODE_DISABLED) {
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP
Can we avoid $ifdef-s in the middle of the code, please?
> + int status;
> +
> + /*
> + * Since the process is going to restore seccomp state, we
> + * need to temporarily suspend seccomp before we restore this
> + * state, otherwise subsequent restore syscalls might be
> + * blocked. seccomp is resumed at the end in finalize_restore()
> + */
> + if (ptrace(PTRACE_ATTACH, ret, NULL, NULL) < 0) {
> + pr_perror("ptrace attach failed");
> + kill(ret, SIGKILL);
> + goto err_unlock;
> + }
> +
> + if (waitpid(ret, &status, 0) < 0) {
> + pr_perror("waidpid failed");
> + kill(ret, SIGKILL);
> + goto err_unlock;
> + }
> +
> + if (!WIFSTOPPED(status)) {
> + pr_err("not stopped after ptrace attach?\n");
> + kill(ret, SIGKILL);
> + goto err_unlock;
> + }
> +
> + if (ptrace(PTRACE_SUSPEND_SECCOMP, ret, NULL, 1) < 0) {
> + pr_perror("seccomp suspend failed %d", ret);
> + kill(ret, SIGKILL);
> + goto err_unlock;
> + }
> +
> + if (ptrace(PTRACE_DETACH, ret, NULL, NULL) < 0) {
> + pr_perror("ptrace detach failed");
> + kill(ret, SIGKILL);
> + goto err_unlock;
> + }
> +#else
> + pr_err("trying to restore seccomp on a kernel without seccomp suspend\n");
> + goto err_unlock;
> +#endif
> + }
> +
> if (opts.pidfile && root_item == item) {
> int pid;
>
> @@ -1690,6 +1769,11 @@ detach:
> break;
> }
>
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP
> + if (ptrace(PTRACE_SUSPEND_SECCOMP, pid, NULL, 0) < 0)
> + pr_perror("failed resuming seccomp");
> +#endif
> +
> if (ptrace(PTRACE_DETACH, pid, NULL, 0))
> pr_perror("Unable to execute %d", pid);
> }
> diff --git a/include/prctl.h b/include/prctl.h
> index 441135e..bfede51 100644
> --- a/include/prctl.h
> +++ b/include/prctl.h
> @@ -9,6 +9,9 @@
> #ifndef PR_GET_NAME
> # define PR_GET_NAME 16
> #endif
> +#ifndef PR_SET_SECCOMP
> +# define PR_SET_SECCOMP 22
> +#endif
> #ifndef PR_CAPBSET_READ
> # define PR_CAPBSET_READ 23
> #endif
> diff --git a/include/proc_parse.h b/include/proc_parse.h
> index ebb5351..3daf9a2 100644
> --- a/include/proc_parse.h
> +++ b/include/proc_parse.h
> @@ -84,6 +84,8 @@ struct proc_status_creds {
>
> char state;
> int ppid;
> +
> + int seccomp_mode;
> };
>
> struct mount_info;
> diff --git a/include/ptrace.h b/include/ptrace.h
> index 0d89788..2803afb 100644
> --- a/include/ptrace.h
> +++ b/include/ptrace.h
> @@ -11,6 +11,10 @@
> # define PTRACE_SEIZE 0x4206
> #endif
>
> +#ifndef PTRACE_SUSPEND_SECCOMP
> +# define PTRACE_SUSPEND_SECCOMP 10
> +#endif
> +
> #ifndef PTRACE_INTERRUPT
> # define PTRACE_INTERRUPT 0x4207
> #endif
> diff --git a/pie/restorer.c b/pie/restorer.c
> index 8713c6a..c07988d 100644
> --- a/pie/restorer.c
> +++ b/pie/restorer.c
> @@ -40,6 +40,18 @@
> #define PR_SET_PDEATHSIG 1
> #endif
>
> +#ifndef SECCOMP_MODE_DISABLED
> +#define SECCOMP_MODE_DISABLED 0
> +#endif
> +
> +#ifndef SECCOMP_MODE_STRICT
> +#define SECCOMP_MODE_STRICT 1
> +#endif
> +
> +#ifndef SECCOMP_MODE_FILTER
> +#define SECCOMP_MODE_FILTER 2
> +#endif
> +
> #define sys_prctl_safe(opcode, val1, val2, val3) \
> ({ \
> long __ret = sys_prctl(opcode, val1, val2, val3, 0); \
> diff --git a/proc_parse.c b/proc_parse.c
> index f5c870e..8ebd2d0 100644
> --- a/proc_parse.c
> +++ b/proc_parse.c
> @@ -9,6 +9,7 @@
> #include <string.h>
> #include <ctype.h>
> #include <linux/fs.h>
> +#include <linux/seccomp.h>
>
> #include "asm/types.h"
> #include "list.h"
> @@ -763,7 +764,7 @@ int parse_pid_status(pid_t pid, struct proc_status_creds *cr)
> if (bfdopenr(&f))
> return -1;
>
> - while (done < 8) {
> + while (done < 9) {
> str = breadline(&f);
> if (str == NULL)
> break;
> @@ -824,9 +825,22 @@ int parse_pid_status(pid_t pid, struct proc_status_creds *cr)
>
> done++;
> }
> +
> + if (!strncmp(str, "Seccomp:", 8)) {
> + if (sscanf(str + 9, "%d", &cr->seccomp_mode) != 1) {
> + goto err_parse;
> + }
> +
> + if (cr->seccomp_mode == SECCOMP_MODE_FILTER) {
> + pr_err("SECCOMP_MODE_FILTER not currently supported\n");
> + goto err_parse;
> + }
> +
> + done++;
> + }
> }
>
> - if (done == 8)
> + if (done == 9)
> ret = 0;
>
> err_parse:
> diff --git a/protobuf/core.proto b/protobuf/core.proto
> index 9f70da9..bbccee5 100644
> --- a/protobuf/core.proto
> +++ b/protobuf/core.proto
> @@ -26,6 +26,8 @@ message task_core_entry {
> optional uint32 cg_set = 9;
>
> optional signal_queue_entry signals_s = 10;
> +
> + optional int32 seccomp_mode = 11;
Should this rather be a protobuf enum?
> }
>
> message task_kobj_ids_entry {
> diff --git a/ptrace.c b/ptrace.c
> index be6b67b..6f7bfbe 100644
> --- a/ptrace.c
> +++ b/ptrace.c
> @@ -8,11 +8,14 @@
> #include <limits.h>
> #include <signal.h>
>
> +#include <sys/ptrace.h>
> #include <sys/types.h>
> #include <sys/time.h>
> #include <sys/resource.h>
> #include <sys/wait.h>
>
> +#include <linux/seccomp.h>
> +
> #include "compiler.h"
> #include "asm/types.h"
> #include "util.h"
> @@ -30,9 +33,20 @@ int unseize_task(pid_t pid, int orig_st, int st)
> else if (st == TASK_STOPPED) {
> if (orig_st == TASK_ALIVE)
> kill(pid, SIGSTOP);
> - } else if (st == TASK_ALIVE)
> - /* do nothing */ ;
> - else
> + } else if (st == TASK_ALIVE) {
> +
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP
> + /*
> + * we can always do this, it is a no-op if the task did not have any
> + * seccomp mode enabled
> + */
> + if (ptrace(PTRACE_SUSPEND_SECCOMP, pid, NULL, 0) < 0) {
> + pr_perror("failed resuming seccomp");
> + return -1;
> + }
> +#endif
I guess we should _always_ restore the seccomp it, even if the
target state is TASK_STOPPED. Actually, if in the kernel we make
seccomp suspend turn off automatically upon ptrace detach, this
hunk can be just dropped. I think that doing auto-seccomp-resume
in the kernel is the correct way to go.
> +
> + } else
> pr_err("Unknown final state %d\n", st);
>
> return ptrace(PTRACE_DETACH, pid, NULL, NULL);
> @@ -125,6 +139,19 @@ try_again:
> goto err;
> }
>
> + if (cr.seccomp_mode != SECCOMP_MODE_DISABLED) {
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP
> + if (ptrace(PTRACE_SUSPEND_SECCOMP, pid, NULL, 1) < 0) {
> + pr_perror("suspending seccomp failed");
> + goto err_stop;
> + }
> +#else
> + pr_err("seccomp enabled and seccomp suspending not supported\n");
> + goto err_stop;
> +#endif
> + }
> +
> +
I'm not sure this is the correct place for seccomp suspend as if
we get the non _STOP event (below) we call PTRACE_CONT and continue
running task w/o seccomp turned on.
> if (SI_EVENT(si.si_code) != PTRACE_EVENT_STOP) {
> /*
> * Kernel notifies us about the task being seized received some
> diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
> index 519eb52..4c6f0d1 100644
> --- a/scripts/feature-tests.mak
> +++ b/scripts/feature-tests.mak
> @@ -92,3 +92,14 @@ int main(int argc, char *argv[], char *envp[])
> }
>
> endef
> +
> +define PTRACE_SUSPEND_SECCOMP_TEST
> +
> +#include <linux/ptrace.h>
> +
> +int main(void)
> +{
> + return PTRACE_SUSPEND_SECCOMP;
> +}
> +
> +endef
>
More information about the CRIU
mailing list