[CRIU] [PATCH 1/5] seccomp: add support for SECCOMP_MODE_FILTER

Tycho Andersen tycho.andersen at canonical.com
Mon Nov 16 21:17:45 PST 2015


This commit adds basic support for dumping and restoring seccomp filters
via the new ptrace interface. There are two current known limitations with
this approach:

1. This approach doesn't support restoring tasks who first do a seccomp()
   and then a setuid(); the test elaborates on this and I don't think it is
   tough to do, but it is not done yet.

2. Filters are compared via memcmp(), so two tasks which have the same
   parent task and install identical (via memory) filters will have those
   filters considered to be the "same". Since we force all tasks to have
   the same creds (including seccomp filters) right now, this isn't a
   problem.

The approach used here is very similar to the cgroup approach: the actual
filters are stored in a seccomp.img, and each task has an id that points to
the part of the filter tree it needs to restore. This keeps us from dumping
the same filter multiple times, since filters are inherited on fork.

v2:
 * remove unused seccomp_filters field from struct rst_info
 * rework memory layout for passing filters to restorer blob
 * add a sanity check when finding inherited filters

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 Makefile.crtools        |   1 +
 cr-dump.c               |  16 ++-
 cr-restore.c            |  15 ++-
 image-desc.c            |   1 +
 include/image-desc.h    |   1 +
 include/magic.h         |   1 +
 include/proc_parse.h    |   8 ++
 include/protobuf-desc.h |   5 +-
 include/pstree.h        |   2 +-
 include/ptrace.h        |   4 +
 include/restorer.h      |   4 +
 include/seccomp.h       |   8 ++
 pie/restorer.c          |  35 +++++--
 proc_parse.c            |   9 +-
 protobuf-desc.c         |   1 +
 protobuf/Makefile       |   1 +
 protobuf/core.proto     |   1 +
 protobuf/seccomp.proto  |   8 ++
 pstree.c                |  16 +++
 pycriu/images/images.py |   1 +
 seccomp.c               | 270 ++++++++++++++++++++++++++++++++++++++++++++++++
 21 files changed, 389 insertions(+), 19 deletions(-)
 create mode 100644 protobuf/seccomp.proto
 create mode 100644 seccomp.c

diff --git a/Makefile.crtools b/Makefile.crtools
index 847b11d..254a7a2 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -79,6 +79,7 @@ obj-y	+= seize.o
 obj-y	+= fault-injection.o
 obj-y	+= pie/util-fd.o
 obj-y	+= pie/util.o
+obj-y	+= seccomp.o
 
 ifneq ($(MAKECMDGOALS),clean)
 incdeps := y
diff --git a/cr-dump.c b/cr-dump.c
index 58c8470..6ee84b4 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -671,6 +671,7 @@ static int dump_task_core_all(struct pstree_item *item,
 	CoreEntry *core = item->core[0];
 	pid_t pid = item->pid.real;
 	int ret = -1;
+	struct proc_status_creds *creds;
 
 	pr_info("\n");
 	pr_info("Dumping core (pid: %d)\n", pid);
@@ -680,10 +681,16 @@ static int dump_task_core_all(struct pstree_item *item,
 	if (ret < 0)
 		goto err;
 
-	if (dmpi(item)->pi_creds->seccomp_mode != SECCOMP_MODE_DISABLED) {
-		pr_info("got seccomp mode %d for %d\n", dmpi(item)->pi_creds->seccomp_mode, item->pid.virt);
+	creds = dmpi(item)->pi_creds;
+	if (creds->seccomp_mode != SECCOMP_MODE_DISABLED) {
+		pr_info("got seccomp mode %d for %d\n", creds->seccomp_mode, item->pid.virt);
 		core->tc->has_seccomp_mode = true;
-		core->tc->seccomp_mode = dmpi(item)->pi_creds->seccomp_mode;
+		core->tc->seccomp_mode = creds->seccomp_mode;
+
+		if (creds->seccomp_mode == SECCOMP_MODE_FILTER) {
+			core->tc->has_seccomp_filter = true;
+			core->tc->seccomp_filter = creds->last_filter;
+		}
 	}
 
 	strlcpy((char *)core->tc->comm, stat->comm, TASK_COMM_LEN);
@@ -1519,6 +1526,9 @@ int cr_dump_tasks(pid_t pid)
 	if (!glob_imgset)
 		goto err;
 
+	if (collect_seccomp_filters() < 0)
+		goto err;
+
 	for_each_pstree_item(item) {
 		if (dump_one_task(item))
 			goto err;
diff --git a/cr-restore.c b/cr-restore.c
index c132588..5753978 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -193,6 +193,9 @@ static int root_prepare_shared(void)
 	if (prepare_remaps())
 		return -1;
 
+	if (prepare_seccomp_filters())
+		return -1;
+
 	for (i = 0; i < ARRAY_SIZE(cinfos); i++) {
 		ret = collect_image(cinfos[i]);
 		if (ret)
@@ -1096,6 +1099,7 @@ static inline int fork_with_pid(struct pstree_item *item)
 
 		item->state = ca.core->tc->task_state;
 		rsti(item)->cg_set = ca.core->tc->cg_set;
+
 		rsti(item)->has_seccomp = ca.core->tc->seccomp_mode != SECCOMP_MODE_DISABLED;
 
 		if (item->state == TASK_DEAD)
@@ -2730,6 +2734,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	int lsm_profile_len = 0;
 	unsigned long lsm_pos = 0;
 
+	int n_seccomp_filters = 0;
+	unsigned long seccomp_filter_pos = 0;
+
 	struct vm_area_list self_vmas;
 	struct vm_area_list *vmas = &rsti(current)->vmas;
 	int i;
@@ -2836,6 +2843,10 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 
 	}
 
+	if (seccomp_filters_get_rst_pos(core, &n_seccomp_filters, &seccomp_filter_pos) < 0)
+		goto err;
+
+
 	rst_mem_size = rst_mem_lock();
 	restore_bootstrap_len = restorer_len + args_len + rst_mem_size;
 
@@ -2958,10 +2969,12 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	remap_array(rlims,	  rlims_nr, rlims_cpos);
 	remap_array(helpers,	  n_helpers, helpers_pos);
 	remap_array(zombies,	  n_zombies, zombies_pos);
+	remap_array(seccomp_filters,	n_seccomp_filters, seccomp_filter_pos);
 
 #undef remap_array
 
-	task_args->seccomp_mode = core->tc->seccomp_mode;
+	if (core->tc->has_seccomp_mode)
+		task_args->seccomp_mode = core->tc->seccomp_mode;
 
 	if (lsm)
 		task_args->creds.lsm_profile = rst_mem_remap_ptr(lsm_pos, RM_PRIVATE);
diff --git a/image-desc.c b/image-desc.c
index d5cc132..a07eb88 100644
--- a/image-desc.c
+++ b/image-desc.c
@@ -91,6 +91,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
 	FD_ENTRY(CGROUP,	"cgroup"),
 	FD_ENTRY(TIMERFD,	"timerfd"),
 	FD_ENTRY(CPUINFO,	"cpuinfo"),
+	FD_ENTRY(SECCOMP,	"seccomp"),
 	FD_ENTRY(USERNS,	"userns-%d"),
 
 	[CR_FD_STATS] = {
diff --git a/include/image-desc.h b/include/image-desc.h
index cb45b20..fe1289f 100644
--- a/include/image-desc.h
+++ b/include/image-desc.h
@@ -76,6 +76,7 @@ enum {
 	CR_FD_CGROUP,
 	CR_FD_TIMERFD,
 	CR_FD_FILE_LOCKS,
+	CR_FD_SECCOMP,
 	_CR_FD_GLOB_TO,
 
 	CR_FD_TMPFS_IMG,
diff --git a/include/magic.h b/include/magic.h
index 2af614b..c899a45 100644
--- a/include/magic.h
+++ b/include/magic.h
@@ -89,6 +89,7 @@
 #define TIMERFD_MAGIC		0x50493712 /* Korocha */
 #define CPUINFO_MAGIC		0x61404013 /* Nyandoma */
 #define USERNS_MAGIC		0x55474906 /* Kazan */
+#define SECCOMP_MAGIC		0x64413049 /* Kostomuksha */
 
 #define IFADDR_MAGIC		RAW_IMAGE_MAGIC
 #define ROUTE_MAGIC		RAW_IMAGE_MAGIC
diff --git a/include/proc_parse.h b/include/proc_parse.h
index 92fc477..4617fe6 100644
--- a/include/proc_parse.h
+++ b/include/proc_parse.h
@@ -13,6 +13,7 @@
 #include "protobuf/signalfd.pb-c.h"
 #include "protobuf/fsnotify.pb-c.h"
 #include "protobuf/timerfd.pb-c.h"
+#include "protobuf/seccomp.pb-c.h"
 
 #define PROC_TASK_COMM_LEN	32
 #define PROC_TASK_COMM_LEN_FMT	"(%31s"
@@ -72,6 +73,12 @@ struct proc_pid_stat {
 	int			exit_code;
 };
 
+struct seccomp_info {
+	SeccompFilter filter;
+	int id;
+	struct seccomp_info *prev;
+};
+
 #define PROC_CAP_SIZE	2
 
 struct proc_status_creds {
@@ -87,6 +94,7 @@ struct proc_status_creds {
 	int			ppid;
 
 	int			seccomp_mode;
+	u32			last_filter;
 };
 
 bool proc_status_creds_eq(struct proc_status_creds *o1, struct proc_status_creds *o2);
diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
index ab7e4f2..cc78208 100644
--- a/include/protobuf-desc.h
+++ b/include/protobuf-desc.h
@@ -51,6 +51,7 @@ enum {
 	PB_TUNFILE,
 	PB_IRMAP_CACHE,
 	PB_CGROUP,
+	PB_SECCOMP,
 	PB_TIMERFD,
 	PB_CPUINFO,
 	PB_USERNS,
@@ -59,8 +60,8 @@ enum {
 	/* PB_AUTOGEN_STOP */
 
 	PB_PAGEMAP_HEAD,
-	PB_IDS,		/* 50 */
-	PB_SIGACT,
+	PB_IDS,
+	PB_SIGACT,		/* 50 */
 	PB_NETDEV,
 	PB_REMAP_FPATH,
 	PB_SK_QUEUES,
diff --git a/include/pstree.h b/include/pstree.h
index a09e956..0c91471 100644
--- a/include/pstree.h
+++ b/include/pstree.h
@@ -44,7 +44,6 @@ struct dmp_info {
 	 * threads. Dumping tasks with different creds is not supported.
 	 */
 	struct proc_status_creds *pi_creds;
-
 };
 
 static inline struct dmp_info *dmpi(struct pstree_item *i)
@@ -94,4 +93,5 @@ extern void pstree_free_cores(struct pstree_item *item);
 
 extern int collect_pstree_ids(void);
 
+extern int preorder_pstree_traversal(struct pstree_item *item, int (*f)(struct pstree_item *));
 #endif /* __CR_PSTREE_H__ */
diff --git a/include/ptrace.h b/include/ptrace.h
index 079ad63..4ed8357 100644
--- a/include/ptrace.h
+++ b/include/ptrace.h
@@ -47,6 +47,10 @@ struct ptrace_peeksiginfo_args {
 #define PTRACE_GETSIGMASK	0x420a
 #define PTRACE_SETSIGMASK	0x420b
 
+#ifndef PTRACE_SECCOMP_GET_FILTER
+#define PTRACE_SECCOMP_GET_FILTER	0x420c
+#endif
+
 #define PTRACE_SEIZE_DEVEL	0x80000000
 
 #define PTRACE_EVENT_FORK	1
diff --git a/include/restorer.h b/include/restorer.h
index afcaf68..74be81d 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -137,6 +137,10 @@ struct task_restore_args {
 
 	pid_t				*zombies;
 	unsigned int			zombies_n;
+
+	struct sock_fprog		*seccomp_filters;
+	unsigned int			seccomp_filters_n;
+
 	/* * * * * * * * * * * * * * * * * * * * */
 
 	unsigned long			task_size;
diff --git a/include/seccomp.h b/include/seccomp.h
index 017dcd4..f87584f 100644
--- a/include/seccomp.h
+++ b/include/seccomp.h
@@ -1,6 +1,11 @@
 #ifndef __CR_SECCOMP_H__
 #define __CR_SECCOMP_H__
 
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+
+#include "protobuf/core.pb-c.h"
+
 #ifndef SECCOMP_MODE_DISABLED
 #define SECCOMP_MODE_DISABLED 0
 #endif
@@ -13,4 +18,7 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
+extern int collect_seccomp_filters(void);
+extern int prepare_seccomp_filters(void);
+extern int seccomp_filters_get_rst_pos(CoreEntry *item, int *count, unsigned long *pos);
 #endif
diff --git a/pie/restorer.c b/pie/restorer.c
index 26494f9..4665c5d 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -334,17 +334,39 @@ static int restore_signals(siginfo_t *ptr, int nr, bool group)
 	return 0;
 }
 
-static void restore_seccomp(int seccomp_mode)
+static void restore_seccomp(struct task_restore_args *args)
 {
-	switch (seccomp_mode) {
+	switch (args->seccomp_mode) {
 	case SECCOMP_MODE_DISABLED:
 		return;
 	case SECCOMP_MODE_STRICT:
 		if (sys_prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0))
 			goto die;
 		return;
-	case SECCOMP_MODE_FILTER:
-		goto die;
+	case SECCOMP_MODE_FILTER: {
+		int i;
+		void *filter_data;
+
+		filter_data = &args->seccomp_filters[args->seccomp_filters_n];
+
+		for (i = 0; i < args->seccomp_filters_n; i++) {
+			struct sock_fprog *fprog = &args->seccomp_filters[i];
+
+			fprog->filter = filter_data;
+
+			/* We always TSYNC here, since we require that the
+			 * creds for all threads be the same; this means we
+			 * don't have to restore_seccomp() in threads, and that
+			 * future TSYNC behavior will be correct.
+			 */
+			if (sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, (char *) fprog) < 0)
+				goto die;
+
+			filter_data += fprog->len * sizeof(struct sock_filter);
+		}
+
+		return;
+	}
 	default:
 		goto die;
 	}
@@ -443,9 +465,8 @@ long __export_restore_thread(struct thread_restore_args *args)
 		pr_info("Restoring seccomp mode %d for %ld\n", args->ta->seccomp_mode, sys_getpid());
 
 	restore_finish_stage(CR_STATE_RESTORE_CREDS);
-	futex_dec_and_wake(&thread_inprogress);
 
-	restore_seccomp(args->ta->seccomp_mode);
+	futex_dec_and_wake(&thread_inprogress);
 
 	new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;
 	rst_sigreturn(new_sp);
@@ -1283,7 +1304,7 @@ long __export_restore_task(struct task_restore_args *args)
 
 	restore_posix_timers(args);
 
-	restore_seccomp(args->seccomp_mode);
+	restore_seccomp(args);
 
 	sys_munmap(args->rst_mem, args->rst_mem_size);
 
diff --git a/proc_parse.c b/proc_parse.c
index 4f45cc3..bce5117 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -835,11 +835,6 @@ int parse_pid_status(pid_t pid, struct proc_status_creds *cr)
 				goto err_parse;
 			}
 
-			if (cr->seccomp_mode == SECCOMP_MODE_FILTER) {
-				pr_err("SECCOMP_MODE_FILTER not currently supported\n");
-				goto err_parse;
-			}
-
 			done++;
 		}
 	}
@@ -2142,6 +2137,10 @@ int aufs_parse(struct mount_info *new)
 
 bool proc_status_creds_eq(struct proc_status_creds *o1, struct proc_status_creds *o2)
 {
+	/* FIXME: this is a little too strict, we should do semantic comparison
+	 * of seccomp filters instead of forcing them to be exactly identical.
+	 * It's not unsafe, though, so let's be lazy for now.
+	 */
 	return memcmp(o1, o2, sizeof(struct proc_status_creds)) == 0;
 }
 
diff --git a/protobuf-desc.c b/protobuf-desc.c
index 873fd3b..ed56e9c 100644
--- a/protobuf-desc.c
+++ b/protobuf-desc.c
@@ -61,6 +61,7 @@
 #include "protobuf/timerfd.pb-c.h"
 #include "protobuf/cpuinfo.pb-c.h"
 #include "protobuf/userns.pb-c.h"
+#include "protobuf/seccomp.pb-c.h"
 
 struct cr_pb_message_desc cr_pb_descs[PB_MAX];
 
diff --git a/protobuf/Makefile b/protobuf/Makefile
index 0b11852..86cc90e 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -55,6 +55,7 @@ proto-obj-y	+= cgroup.o
 proto-obj-y	+= userns.o
 proto-obj-y	+= google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto
 proto-obj-y	+= opts.o
+proto-obj-y	+= seccomp.o
 
 CFLAGS		+= -I$(obj)/
 
diff --git a/protobuf/core.proto b/protobuf/core.proto
index fd78f5c..94322c0 100644
--- a/protobuf/core.proto
+++ b/protobuf/core.proto
@@ -37,6 +37,7 @@ message task_core_entry {
 	optional signal_queue_entry	signals_s	= 10;
 
 	optional seccomp_mode		seccomp_mode	= 11;
+	optional uint32			seccomp_filter	= 12;
 }
 
 message task_kobj_ids_entry {
diff --git a/protobuf/seccomp.proto b/protobuf/seccomp.proto
new file mode 100644
index 0000000..03b248c
--- /dev/null
+++ b/protobuf/seccomp.proto
@@ -0,0 +1,8 @@
+message seccomp_filter {
+	required bytes		filter			= 1;
+	optional uint32		prev			= 2;
+}
+
+message seccomp_entry {
+	repeated seccomp_filter		seccomp_filters	= 1;
+}
diff --git a/pstree.c b/pstree.c
index 18e5a8e..be0548a 100644
--- a/pstree.c
+++ b/pstree.c
@@ -218,6 +218,22 @@ struct pstree_item *pstree_item_next(struct pstree_item *item)
 	return NULL;
 }
 
+/* Preorder traversal of pstree item */
+int preorder_pstree_traversal(struct pstree_item *item, int (*f)(struct pstree_item *))
+{
+	struct pstree_item *cursor;
+
+	if (f(item) < 0)
+		return -1;
+
+	list_for_each_entry(cursor, &item->children, sibling) {
+		if (preorder_pstree_traversal(cursor, f) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
 int dump_pstree(struct pstree_item *root_item)
 {
 	struct pstree_item *item = root_item;
diff --git a/pycriu/images/images.py b/pycriu/images/images.py
index 8d90917..bb4b948 100644
--- a/pycriu/images/images.py
+++ b/pycriu/images/images.py
@@ -305,6 +305,7 @@ handlers = {
 	'IPCNS_MSG'		: entry_handler(ipc_msg_entry),
 	'NETNS'			: entry_handler(netns_entry),
 	'USERNS'		: entry_handler(userns_entry),
+	'SECCOMP'		: entry_handler(seccomp_entry),
 	}
 
 def __rhandler(f):
diff --git a/seccomp.c b/seccomp.c
new file mode 100644
index 0000000..50f6a70
--- /dev/null
+++ b/seccomp.c
@@ -0,0 +1,270 @@
+#include <linux/filter.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "imgset.h"
+#include "kcmp.h"
+#include "pstree.h"
+#include "ptrace.h"
+#include "proc_parse.h"
+#include "seccomp.h"
+#include "servicefd.h"
+#include "util.h"
+#include "rst-malloc.h"
+
+#include "protobuf.h"
+#include "protobuf/seccomp.pb-c.h"
+
+/* populated on dump during collect_seccomp_filters() */
+static int next_filter_id = 0;
+static struct seccomp_info **filters = NULL;
+
+static struct seccomp_info *find_inherited(struct pstree_item *parent,
+					   struct sock_filter *filter, int len)
+{
+	struct seccomp_info *info;
+
+	/* if we have no filters yet, this one has no parent */
+	if (!filters)
+		return NULL;
+
+	for (info = filters[dmpi(parent)->pi_creds->last_filter]; info; info = info->prev) {
+
+		if (len != info->filter.filter.len)
+			continue;
+		if (!memcmp(filter, info->filter.filter.data, len))
+			return info;
+	}
+
+	return NULL;
+}
+
+static int collect_filter_for_pstree(struct pstree_item *item)
+{
+	struct seccomp_info *infos = NULL, *cursor;
+	int info_count, i, ret = -1;
+	struct sock_filter buf[BPF_MAXINSNS];
+	void *m;
+
+	if (item->state == TASK_DEAD ||
+	    dmpi(item)->pi_creds->seccomp_mode != SECCOMP_MODE_FILTER)
+		return 0;
+
+	for (i = 0; true; i++) {
+		int len;
+		struct seccomp_info *info, *inherited = NULL;
+
+		len = ptrace(PTRACE_SECCOMP_GET_FILTER, item->pid.real, i, buf);
+		if (len < 0) {
+			if (errno == ENOENT) {
+				/* end of the search */
+				BUG_ON(i == 0);
+				goto save_infos;
+			} else if (errno == EINVAL) {
+				pr_err("dumping seccomp infos not supported\n");
+				goto out;
+			} else {
+				pr_perror("couldn't dump seccomp filter");
+				goto out;
+			}
+		}
+
+		inherited = find_inherited(item->parent, buf, len);
+		if (inherited) {
+			bool found = false;
+
+			/* Small sanity check: if infos is already populated,
+			 * we should have inherited that filter too. */
+			for (cursor = infos; cursor; cursor = cursor->prev) {
+				if (inherited->prev== cursor) {
+					found = true;
+					break;
+				}
+			}
+
+			BUG_ON(!found);
+
+			infos = inherited;
+			continue;
+		}
+
+		info = xmalloc(sizeof(*info));
+		if (!info)
+			goto out;
+		seccomp_filter__init(&info->filter);
+
+		info->filter.filter.len = len * sizeof(struct sock_filter);
+		info->filter.filter.data = xmalloc(info->filter.filter.len);
+		if (!info->filter.filter.data)
+			goto out;
+
+		memcpy(info->filter.filter.data, buf, info->filter.filter.len);
+
+		info->prev = infos;
+		infos = info;
+	}
+
+save_infos:
+	info_count = i;
+
+	m = xrealloc(filters, sizeof(*filters) * (next_filter_id + info_count));
+	if (!m)
+		goto out;
+	filters = m;
+
+	for (cursor = infos, i = info_count + next_filter_id - 1;
+	     i >= next_filter_id; i--) {
+		BUG_ON(!cursor);
+		cursor->id = i;
+		filters[i] = cursor;
+		cursor = cursor->prev;
+	}
+
+	next_filter_id += info_count;
+
+	dmpi(item)->pi_creds->last_filter = infos->id;
+
+	/* Don't free the part of the tree we just successfully acquired */
+	infos = NULL;
+	ret = 0;
+out:
+	while (infos) {
+		struct seccomp_info *freeme = infos;
+		infos = infos->prev;
+		xfree(freeme->filter.filter.data);
+		xfree(freeme);
+	}
+
+	return ret;
+}
+
+static int dump_seccomp_filters(void)
+{
+	SeccompEntry se = SECCOMP_ENTRY__INIT;
+	int ret = -1, i;
+
+	/* If we didn't collect any filters, don't create a seccomp image at all. */
+	if (next_filter_id == 0)
+		return 0;
+
+	se.seccomp_filters = xzalloc(sizeof(*se.seccomp_filters) * next_filter_id);
+	if (!se.seccomp_filters)
+		return -1;
+
+	se.n_seccomp_filters = next_filter_id;
+
+	for (i = 0; i < next_filter_id; i++) {
+		SeccompFilter *sf;
+		struct seccomp_info *cur = filters[i];
+
+		sf = se.seccomp_filters[cur->id] = &cur->filter;
+		if (cur->prev) {
+			sf->has_prev = true;
+			sf->prev = cur->prev->id;
+		}
+	}
+
+	ret = pb_write_one(img_from_set(glob_imgset, CR_FD_SECCOMP), &se, PB_SECCOMP);
+
+	xfree(se.seccomp_filters);
+
+	for (i = 0; i < next_filter_id; i++) {
+		struct seccomp_info *freeme = filters[i];
+
+		xfree(freeme->filter.filter.data);
+		xfree(freeme);
+	}
+	xfree(filters);
+
+	return ret;
+}
+
+int collect_seccomp_filters(void)
+{
+	if (preorder_pstree_traversal(root_item, collect_filter_for_pstree) < 0)
+		return -1;
+
+	if (dump_seccomp_filters())
+		return -1;
+
+	return 0;
+}
+
+/* Populated on restore by prepare_seccomp_filters */
+static SeccompEntry *se;
+
+int prepare_seccomp_filters(void)
+{
+	struct cr_img *img;
+	int ret;
+
+	img = open_image(CR_FD_SECCOMP, O_RSTR);
+	if (!img)
+		return -1;
+
+	ret = pb_read_one_eof(img, &se, PB_SECCOMP);
+	close_image(img);
+	if (ret <= 0)
+		return 0; /* there were no filters */
+
+	BUG_ON(!se);
+
+	return 0;
+}
+
+int seccomp_filters_get_rst_pos(CoreEntry *core, int *count, unsigned long *pos)
+{
+	SeccompFilter *sf = NULL;
+	struct sock_fprog *arr = NULL;
+	void *filter_data = NULL;
+	int ret = -1, i;
+	size_t filter_size = 0;
+
+	if (!core->tc->has_seccomp_filter) {
+		*count = 0;
+		return 0;
+	}
+
+	*count = 0;
+	*pos = rst_mem_cpos(RM_PRIVATE);
+
+	BUG_ON(core->tc->seccomp_filter > se->n_seccomp_filters);
+	sf = se->seccomp_filters[core->tc->seccomp_filter];
+
+	while (1) {
+		(*count)++;
+
+		filter_size += sf->filter.len;
+
+		if (!sf->has_prev)
+			break;
+
+		sf = se->seccomp_filters[sf->prev];
+	}
+
+	arr = rst_mem_alloc(sizeof(struct sock_fprog) * (*count) + filter_size, RM_PRIVATE);
+	if (!arr)
+		goto out;
+
+	filter_data = &arr[*count];
+	sf = se->seccomp_filters[core->tc->seccomp_filter];
+	for (i = 0; i < *count; i++) {
+		struct sock_fprog *fprog = &arr[i];
+
+		BUG_ON(sf->filter.len % sizeof(struct sock_filter));
+		fprog->len = sf->filter.len / sizeof(struct sock_filter);
+
+		memcpy(filter_data, sf->filter.data, sf->filter.len);
+
+		filter_data += sf->filter.len;
+		sf = se->seccomp_filters[sf->prev];
+	}
+
+	ret = 0;
+
+out:
+	seccomp_entry__free_unpacked(se, NULL);
+	return ret;
+}
-- 
2.5.0



More information about the CRIU mailing list