[CRIU] [PATCH 1/3] seccomp: add initial support for SECCOMP_MODE_STRICT

Pavel Emelyanov xemul at parallels.com
Tue Jun 2 05:40:24 PDT 2015


Cool :) And can you elaborate on why exactly the filtered mode cannot be
supported?

More comments inline.

> diff --git a/cr-dump.c b/cr-dump.c
> index f865967..72533f1 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -19,6 +19,8 @@
>  #include <sched.h>
>  #include <sys/resource.h>
>  
> +#include <linux/seccomp.h>
> +
>  #include "protobuf.h"
>  #include "protobuf/fdinfo.pb-c.h"
>  #include "protobuf/fs.pb-c.h"
> @@ -551,6 +553,22 @@ err:
>  	return ret;
>  }
>  
> +static int get_seccomp_state(pid_t pid, TaskCoreEntry *tc)
> +{
> +	struct proc_status_creds cr;
> +	int ret;
> +
> +	ret = parse_pid_status(pid, &cr);
> +	if (ret < 0)
> +		return -1;

Going to read proc files slows things down. Can we take the status
parse result from the seize_task() here?

> +	if (cr.seccomp_mode != SECCOMP_MODE_DISABLED) {
> +		tc->has_seccomp_mode = true;
> +		tc->seccomp_mode = cr.seccomp_mode;
> +	}
> +	return 0;
> +}
> +
>  static DECLARE_KCMP_TREE(vm_tree, KCMP_VM);
>  static DECLARE_KCMP_TREE(fs_tree, KCMP_FS);
>  static DECLARE_KCMP_TREE(files_tree, KCMP_FILES);
> @@ -672,6 +690,10 @@ static int dump_task_core_all(struct pstree_item *item,
>  	if (ret < 0)
>  		goto err;
>  
> +	ret = get_seccomp_state(pid, core->tc);
> +	if (ret < 0)
> +		goto err;
> +
>  	strncpy((char *)core->tc->comm, stat->comm, TASK_COMM_LEN);
>  	core->tc->flags = stat->flags;
>  	core->tc->task_state = item->state;
> diff --git a/cr-restore.c b/cr-restore.c
> index aa00dc2..ea9d8d2 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -24,6 +24,8 @@
>  
>  #include <sys/sendfile.h>
>  
> +#include <linux/seccomp.h>
> +
>  #include "ptrace.h"
>  #include "compiler.h"
>  #include "asm/types.h"
> @@ -756,6 +758,36 @@ static int collect_helper_pids()
>  	return 0;
>  }
>  
> +/*
> + * Prepare the seccomp state for the process. Note that seccomp is restored in
> + * suspended mode, and then unsuspended after the restorer blob completes in
> + * finalize_restore().
> + */

I'm not sure I get this. The prctl below will just turn the unsuspended strict mode
on. The ptrace-s in fork_with_pid() will try to suspend one, but this looks racy.
What prevents the prctl from being called _before_ the mentioned ptrace-s?

> +static int prepare_seccomp(pid_t pid, CoreEntry *core)
> +{
> +	if (core->tc->seccomp_mode == SECCOMP_MODE_DISABLED)
> +		return 0;
> +
> +	pr_info("restoring seccomp mode %d for %d\n", core->tc->seccomp_mode, pid);
> +
> +	switch (core->tc->seccomp_mode) {
> +	case SECCOMP_MODE_STRICT:
> +		if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0)) {
> +			pr_perror("setting seccomp failed");
> +			return -1;
> +		}
> +		break;
> +	case SECCOMP_MODE_FILTER:
> +		pr_err("seccomp mode 2 not supported\n");
> +		return -1;
> +	default:
> +		pr_err("unknown seccomp mode %d\n", core->tc->seccomp_mode);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>  static int open_cores(int pid, CoreEntry *leader_core)
>  {
>  	int i, tpid;
> @@ -826,6 +858,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
>  	if (collect_helper_pids() < 0)
>  		return -1;
>  
> +	if (prepare_seccomp(pid, core) < 0)
> +		return -1;
> +
>  	if (inherit_fd_fini() < 0)
>  		return -1;
>  
> @@ -1135,13 +1170,57 @@ static inline int fork_with_pid(struct pstree_item *item)
>  		goto err_unlock;
>  	}
>  
> -
>  	if (item == root_item) {
>  		item->pid.real = ret;
>  		pr_debug("PID: real %d virt %d\n",
>  				item->pid.real, item->pid.virt);
>  	}
>  
> +	if (ca.core->tc->seccomp_mode != SECCOMP_MODE_DISABLED) {
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP

Can we avoid $ifdef-s in the middle of the code, please?

> +		int status;
> +
> +		/*
> +		 * Since the process is going to restore seccomp state, we
> +		 * need to temporarily suspend seccomp before we restore this
> +		 * state, otherwise subsequent restore syscalls might be
> +		 * blocked. seccomp is resumed at the end in finalize_restore()
> +		 */
> +		if (ptrace(PTRACE_ATTACH, ret, NULL, NULL) < 0) {
> +			pr_perror("ptrace attach failed");
> +			kill(ret, SIGKILL);
> +			goto err_unlock;
> +		}
> +
> +		if (waitpid(ret, &status, 0) < 0) {
> +			pr_perror("waidpid failed");
> +			kill(ret, SIGKILL);
> +			goto err_unlock;
> +		}
> +
> +		if (!WIFSTOPPED(status)) {
> +			pr_err("not stopped after ptrace attach?\n");
> +			kill(ret, SIGKILL);
> +			goto err_unlock;
> +		}
> +
> +		if (ptrace(PTRACE_SUSPEND_SECCOMP, ret, NULL, 1) < 0) {
> +			pr_perror("seccomp suspend failed %d", ret);
> +			kill(ret, SIGKILL);
> +			goto err_unlock;
> +		}
> +
> +		if (ptrace(PTRACE_DETACH, ret, NULL, NULL) < 0) {
> +			pr_perror("ptrace detach failed");
> +			kill(ret, SIGKILL);
> +			goto err_unlock;
> +		}
> +#else
> +		pr_err("trying to restore seccomp on a kernel without seccomp suspend\n");
> +		goto err_unlock;
> +#endif
> +	}
> +
>  	if (opts.pidfile && root_item == item) {
>  		int pid;
>  
> @@ -1690,6 +1769,11 @@ detach:
>  				break;
>  			}
>  
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP
> +			if (ptrace(PTRACE_SUSPEND_SECCOMP, pid, NULL, 0) < 0)
> +				pr_perror("failed resuming seccomp");
> +#endif
> +
>  			if (ptrace(PTRACE_DETACH, pid, NULL, 0))
>  				pr_perror("Unable to execute %d", pid);
>  		}
> diff --git a/include/prctl.h b/include/prctl.h
> index 441135e..bfede51 100644
> --- a/include/prctl.h
> +++ b/include/prctl.h
> @@ -9,6 +9,9 @@
>  #ifndef PR_GET_NAME
>  # define PR_GET_NAME		16
>  #endif
> +#ifndef PR_SET_SECCOMP
> +# define PR_SET_SECCOMP		22
> +#endif
>  #ifndef PR_CAPBSET_READ
>  # define PR_CAPBSET_READ	23
>  #endif
> diff --git a/include/proc_parse.h b/include/proc_parse.h
> index ebb5351..3daf9a2 100644
> --- a/include/proc_parse.h
> +++ b/include/proc_parse.h
> @@ -84,6 +84,8 @@ struct proc_status_creds {
>  
>  	char			state;
>  	int			ppid;
> +
> +	int			seccomp_mode;
>  };
>  
>  struct mount_info;
> diff --git a/include/ptrace.h b/include/ptrace.h
> index 0d89788..2803afb 100644
> --- a/include/ptrace.h
> +++ b/include/ptrace.h
> @@ -11,6 +11,10 @@
>  # define PTRACE_SEIZE		0x4206
>  #endif
>  
> +#ifndef PTRACE_SUSPEND_SECCOMP
> +# define PTRACE_SUSPEND_SECCOMP     10
> +#endif
> +
>  #ifndef PTRACE_INTERRUPT
>  # define PTRACE_INTERRUPT	0x4207
>  #endif
> diff --git a/pie/restorer.c b/pie/restorer.c
> index 8713c6a..c07988d 100644
> --- a/pie/restorer.c
> +++ b/pie/restorer.c
> @@ -40,6 +40,18 @@
>  #define PR_SET_PDEATHSIG 1
>  #endif
>  
> +#ifndef SECCOMP_MODE_DISABLED
> +#define SECCOMP_MODE_DISABLED 0
> +#endif
> +
> +#ifndef SECCOMP_MODE_STRICT
> +#define SECCOMP_MODE_STRICT 1
> +#endif
> +
> +#ifndef SECCOMP_MODE_FILTER
> +#define SECCOMP_MODE_FILTER 2
> +#endif
> +
>  #define sys_prctl_safe(opcode, val1, val2, val3)			\
>  	({								\
>  		long __ret = sys_prctl(opcode, val1, val2, val3, 0);	\
> diff --git a/proc_parse.c b/proc_parse.c
> index f5c870e..8ebd2d0 100644
> --- a/proc_parse.c
> +++ b/proc_parse.c
> @@ -9,6 +9,7 @@
>  #include <string.h>
>  #include <ctype.h>
>  #include <linux/fs.h>
> +#include <linux/seccomp.h>
>  
>  #include "asm/types.h"
>  #include "list.h"
> @@ -763,7 +764,7 @@ int parse_pid_status(pid_t pid, struct proc_status_creds *cr)
>  	if (bfdopenr(&f))
>  		return -1;
>  
> -	while (done < 8) {
> +	while (done < 9) {
>  		str = breadline(&f);
>  		if (str == NULL)
>  			break;
> @@ -824,9 +825,22 @@ int parse_pid_status(pid_t pid, struct proc_status_creds *cr)
>  
>  			done++;
>  		}
> +
> +		if (!strncmp(str, "Seccomp:", 8)) {
> +			if (sscanf(str + 9, "%d", &cr->seccomp_mode) != 1) {
> +				goto err_parse;
> +			}
> +
> +			if (cr->seccomp_mode == SECCOMP_MODE_FILTER) {
> +				pr_err("SECCOMP_MODE_FILTER not currently supported\n");
> +				goto err_parse;
> +			}
> +
> +			done++;
> +		}
>  	}
>  
> -	if (done == 8)
> +	if (done == 9)
>  		ret = 0;
>  
>  err_parse:
> diff --git a/protobuf/core.proto b/protobuf/core.proto
> index 9f70da9..bbccee5 100644
> --- a/protobuf/core.proto
> +++ b/protobuf/core.proto
> @@ -26,6 +26,8 @@ message task_core_entry {
>  	optional uint32			cg_set		= 9;
>  
>  	optional signal_queue_entry	signals_s	= 10;
> +
> +	optional int32			seccomp_mode	= 11;

Should this rather be a protobuf enum?

>  }
>  
>  message task_kobj_ids_entry {
> diff --git a/ptrace.c b/ptrace.c
> index be6b67b..6f7bfbe 100644
> --- a/ptrace.c
> +++ b/ptrace.c
> @@ -8,11 +8,14 @@
>  #include <limits.h>
>  #include <signal.h>
>  
> +#include <sys/ptrace.h>
>  #include <sys/types.h>
>  #include <sys/time.h>
>  #include <sys/resource.h>
>  #include <sys/wait.h>
>  
> +#include <linux/seccomp.h>
> +
>  #include "compiler.h"
>  #include "asm/types.h"
>  #include "util.h"
> @@ -30,9 +33,20 @@ int unseize_task(pid_t pid, int orig_st, int st)
>  	else if (st == TASK_STOPPED) {
>  		if (orig_st == TASK_ALIVE)
>  			kill(pid, SIGSTOP);
> -	} else if (st == TASK_ALIVE)
> -		/* do nothing */ ;
> -	else
> +	} else if (st == TASK_ALIVE) {
> +
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP
> +		/*
> +		 * we can always do this, it is a no-op if the task did not have any
> +		 * seccomp mode enabled
> +		 */
> +		if (ptrace(PTRACE_SUSPEND_SECCOMP, pid, NULL, 0) < 0) {
> +			pr_perror("failed resuming seccomp");
> +			return -1;
> +		}
> +#endif

I guess we should _always_ restore the seccomp it, even if the
target state is TASK_STOPPED. Actually, if in the kernel we make
seccomp suspend turn off automatically upon ptrace detach, this
hunk can be just dropped. I think that doing auto-seccomp-resume
in the kernel is the correct way to go.

> +
> +	} else
>  		pr_err("Unknown final state %d\n", st);
>  
>  	return ptrace(PTRACE_DETACH, pid, NULL, NULL);
> @@ -125,6 +139,19 @@ try_again:
>  		goto err;
>  	}
>  
> +	if (cr.seccomp_mode != SECCOMP_MODE_DISABLED) {
> +#ifdef CONFIG_HAS_SUSPEND_SECCOMP
> +		if (ptrace(PTRACE_SUSPEND_SECCOMP, pid, NULL, 1) < 0) {
> +			pr_perror("suspending seccomp failed");
> +			goto err_stop;
> +		}
> +#else
> +		pr_err("seccomp enabled and seccomp suspending not supported\n");
> +		goto err_stop;
> +#endif
> +	}
> +
> +

I'm not sure this is the correct place for seccomp suspend as if
we get the non _STOP event (below) we call PTRACE_CONT and continue
running task w/o seccomp turned on.

>  	if (SI_EVENT(si.si_code) != PTRACE_EVENT_STOP) {
>  		/*
>  		 * Kernel notifies us about the task being seized received some
> diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
> index 519eb52..4c6f0d1 100644
> --- a/scripts/feature-tests.mak
> +++ b/scripts/feature-tests.mak
> @@ -92,3 +92,14 @@ int main(int argc, char *argv[], char *envp[])
>  }
>  
>  endef
> +
> +define PTRACE_SUSPEND_SECCOMP_TEST
> +
> +#include <linux/ptrace.h>
> +
> +int main(void)
> +{
> +	return PTRACE_SUSPEND_SECCOMP;
> +}
> +
> +endef
> 



More information about the CRIU mailing list