[CRIU] [PATCH 15/28] seccomp: Add support of per thread filters on dump

Cyrill Gorcunov gorcunov at gmail.com
Wed Mar 21 00:43:00 MSK 2018


From: Cyrill Gorcunov <gorcunov at virtuozzo.com>

Signed-off-by: Cyrill Gorcunov <gorcunov at virtuozzo.com>
---
 criu/cr-dump.c         |   2 +-
 criu/include/seccomp.h |  40 +++++---
 criu/seccomp.c         | 243 +++++++++++++++++++++----------------------------
 3 files changed, 134 insertions(+), 151 deletions(-)

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index eecfa310ed47..7b6f89e4e181 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1930,7 +1930,7 @@ int cr_dump_tasks(pid_t pid)
 	if (!glob_imgset)
 		goto err;
 
-	if (collect_seccomp_filters() < 0)
+	if (seccomp_collect_dump_filters() < 0)
 		goto err;
 
 	/* Errors handled later in detect_pid_reuse */
diff --git a/criu/include/seccomp.h b/criu/include/seccomp.h
index 96320f863fff..ff0465b8266f 100644
--- a/criu/include/seccomp.h
+++ b/criu/include/seccomp.h
@@ -30,11 +30,35 @@
 struct pstree_item;
 struct rb_node;
 
+/*
+ * seccomp filters are bound to @current->seccomp.filter
+ * in the kernel, ie they are per thread structures.
+ *
+ * If filter is assigned then every subsequent call
+ * to fork() makes a copy of this @current->seccomp.filter
+ * pointer into child process.
+ *
+ * The thread group can share a filter if the filter
+ * is assigned with SECCOMP_FILTER_FLAG_TSYNC on group
+ * which has no filters yet.
+ *
+ * To find identity we have to use memcmp because we
+ * don't have access to @current->seccomp.filter pointers
+ * FIXME: Provide kcmp mode for that.
+ */
+struct seccomp_filter_chain {
+	struct seccomp_filter_chain	*prev;
+	SeccompFilter			filter;
+};
+
 struct seccomp_entry {
-	struct rb_node		node;
-	pid_t			tid_real;
-	size_t			last_filter;
-	unsigned int		mode;
+	struct rb_node			node;
+	pid_t				tid_real;
+	size_t				last_filter;
+	unsigned int			mode;
+
+	struct seccomp_filter_chain	*chain;
+	size_t				nr_chains;
 };
 
 extern struct seccomp_entry *seccomp_lookup(pid_t tid_real, bool create, bool mandatory);
@@ -42,14 +66,8 @@ extern struct seccomp_entry *seccomp_lookup(pid_t tid_real, bool create, bool ma
 extern int seccomp_collect_entry(pid_t tid_real, unsigned int mode);
 extern void seccomp_free_entries(void);
 extern int seccomp_dump_thread(pid_t tid_real, ThreadCoreEntry *thread_core);
+extern int seccomp_collect_dump_filters(void);
 
-struct seccomp_info {
-	struct seccomp_info	*prev;
-	int			id;
-	SeccompFilter		filter;
-};
-
-extern int collect_seccomp_filters(void);
 extern int prepare_seccomp_filters(void);
 struct task_restore_args;
 extern int seccomp_filters_get_rst_pos(CoreEntry *item, struct task_restore_args *);
diff --git a/criu/seccomp.c b/criu/seccomp.c
index 7d39bcc69808..0f0866e0c4a5 100644
--- a/criu/seccomp.c
+++ b/criu/seccomp.c
@@ -75,6 +75,21 @@ int seccomp_collect_entry(pid_t tid_real, unsigned int mode)
 	return 0;
 }
 
+static void seccomp_free_chain(struct seccomp_entry *entry)
+{
+	struct seccomp_filter_chain *chain, *prev;
+
+	for (chain = entry->chain; chain; chain = prev) {
+		prev = chain->prev;
+
+		xfree(chain->filter.filter.data);
+		xfree(chain);
+	}
+
+	entry->nr_chains = 0;
+	entry->chain = NULL;
+}
+
 void seccomp_free_entries(void)
 {
 	struct seccomp_entry *entry;
@@ -108,209 +123,159 @@ int seccomp_dump_thread(pid_t tid_real, ThreadCoreEntry *thread_core)
 	return 0;
 }
 
-/* populated on dump during collect_seccomp_filters() */
-static int next_filter_id = 0;
-static struct seccomp_info **filters = NULL;
-
-static struct seccomp_info *find_inherited(int last_filter, struct sock_filter *filter,
-					   int len, struct seccomp_metadata *meta)
-{
-	struct seccomp_info *info;
-
-	/* if we have no filters yet, this one has no parent */
-	if (!filters)
-		return NULL;
-
-	for (info = filters[last_filter]; info; info = info->prev) {
-
-		if (len != info->filter.filter.len)
-			continue;
-		if (!!meta ^ !!info->filter.has_flags)
-			continue;
-		if (info->filter.has_flags && meta) {
-			if (info->filter.flags != meta->flags)
-				continue;
-		}
-		if (!memcmp(filter, info->filter.filter.data, len))
-			return info;
-	}
-
-	return NULL;
-}
-
-static int collect_filter_for_pstree(struct pstree_item *item)
+static int collect_filter(struct seccomp_entry *entry)
 {
 	struct seccomp_metadata meta_buf, *meta = &meta_buf;
-	struct seccomp_info *infos = NULL, *cursor;
-	struct seccomp_entry *entry, *entry_parent;
-	int info_count, i, ret = -1;
+	struct seccomp_filter_chain *chain, *prev;
 	struct sock_filter buf[BPF_MAXINSNS];
-	void *m;
-
-	if (item->pid->state == TASK_DEAD)
-		return 0;
+	size_t pos;
+	int len;
 
-	entry = seccomp_find_entry(item->pid->real);
-	if (!entry)
-		return -1;
 	if (entry->mode != SECCOMP_MODE_FILTER)
 		return 0;
 
-	for (i = 0; true; i++) {
-		int len;
-		struct seccomp_info *info, *inherited = NULL;
-
-		len = ptrace(PTRACE_SECCOMP_GET_FILTER, item->pid->real, i, buf);
+	for (pos = 0; true; pos++) {
+		len = ptrace(PTRACE_SECCOMP_GET_FILTER, entry->tid_real, pos, buf);
 		if (len < 0) {
 			if (errno == ENOENT) {
-				/* end of the search */
-				BUG_ON(i == 0);
-				goto save_infos;
-			} else if (errno == EINVAL) {
-				pr_err("dumping seccomp infos not supported\n");
-				goto out;
+				break;
 			} else {
-				pr_perror("couldn't dump seccomp filter");
-				goto out;
+				pr_perror("Can't fetch filter on tid_real %d pos %zu",
+					  entry->tid_real, pos);
+				return -1;
 			}
 		}
 
 		if (!meta)
 			meta = &meta_buf;
 
-		if (ptrace(PTRACE_SECCOMP_GET_METADATA, item->pid->real, i, meta) < 0) {
+		if (ptrace(PTRACE_SECCOMP_GET_METADATA, entry->tid_real, pos, meta) < 0) {
 			if (errno == EIO) {
 				meta = NULL;
 			} else {
-				pr_perror("couldn't fetch seccomp metadata: pid %d pos %d",
-					  item->pid->real, i);
-				goto out;
+				pr_perror("Can't fetch seccomp metadataon tid_real %d pos %zu",
+					  entry->tid_real, pos);
+				return -1;
 			}
 		}
 
-		entry_parent = seccomp_find_entry(item->parent->pid->real);
-		if (!entry_parent)
-			goto out;
-		inherited = find_inherited(entry_parent->last_filter, buf, len, meta);
-		if (inherited) {
-			bool found = false;
-
-			/* Small sanity check: if infos is already populated,
-			 * we should have inherited that filter too. */
-			for (cursor = infos; cursor; cursor = cursor->prev) {
-				if (inherited->prev== cursor) {
-					found = true;
-					break;
-				}
-			}
+		chain = xzalloc(sizeof(*chain));
+		if (!chain)
+			return -1;
 
-			BUG_ON(!found);
+		seccomp_filter__init(&chain->filter);
 
-			infos = inherited;
-			continue;
+		chain->filter.filter.len = len * sizeof(struct sock_filter);
+		chain->filter.filter.data = xmalloc(chain->filter.filter.len);
+		if (!chain->filter.filter.data) {
+			xfree(chain);
+			return -1;
 		}
 
-		info = xmalloc(sizeof(*info));
-		if (!info)
-			goto out;
-		seccomp_filter__init(&info->filter);
+		memcpy(chain->filter.filter.data, buf, chain->filter.filter.len);
 
 		if (meta) {
-			info->filter.has_flags = true;
-			info->filter.flags = meta->flags;
+			chain->filter.has_flags = true;
+			chain->filter.flags = meta->flags;
 		}
 
-		info->filter.filter.len = len * sizeof(struct sock_filter);
-		info->filter.filter.data = xmalloc(info->filter.filter.len);
-		if (!info->filter.filter.data) {
-			xfree(info);
-			goto out;
-		}
+		prev = entry->chain, entry->chain = chain, chain->prev = prev;
+		entry->nr_chains++;
+	}
 
-		memcpy(info->filter.filter.data, buf, info->filter.filter.len);
+	return 0;
+}
 
-		info->prev = infos;
-		infos = info;
-	}
+static int collect_filters(struct pstree_item *item)
+{
+	struct seccomp_entry *parent, *leader, *entry;
+	size_t i;
 
-save_infos:
-	info_count = i;
+	if (item->pid->state == TASK_DEAD)
+		return 0;
 
-	m = xrealloc(filters, sizeof(*filters) * (next_filter_id + info_count));
-	if (!m)
-		goto out;
-	filters = m;
-
-	for (cursor = infos, i = info_count + next_filter_id - 1;
-	     i >= next_filter_id; i--) {
-		BUG_ON(!cursor);
-		cursor->id = i;
-		filters[i] = cursor;
-		cursor = cursor->prev;
+	parent = item->parent ? seccomp_find_entry(item->parent->pid->real) : NULL;
+	if (!parent && item->parent) {
+		pr_err("Can't collect filter on parent tid_real %d\n",
+		       item->parent->pid->real);
+		return -1;
+	}
+	leader = seccomp_find_entry(item->pid->real);
+	if (!leader) {
+		pr_err("Can't collect filter on leader tid_real %d\n",
+		       item->pid->real);
+		return -1;
 	}
 
-	next_filter_id += info_count;
-
-	entry->last_filter = infos->id;
+	for (i = 0; i < item->nr_threads; i++) {
+		entry = seccomp_find_entry(item->threads[i]->real);
+		if (!leader) {
+			pr_err("Can't collect filter on tid_real %d\n",
+			       item->pid->real);
+			return -1;
+		}
 
-	/* Don't free the part of the tree we just successfully acquired */
-	infos = NULL;
-	ret = 0;
-out:
-	while (infos) {
-		struct seccomp_info *freeme = infos;
-		infos = infos->prev;
-		xfree(freeme->filter.filter.data);
-		xfree(freeme);
+		if (collect_filter(entry))
+			return -1;
 	}
 
-	return ret;
+	return 0;
 }
 
 static int dump_seccomp_filters(void)
 {
 	SeccompEntry se = SECCOMP_ENTRY__INIT;
-	int ret = -1, i;
+	struct seccomp_filter_chain *chain;
+	struct seccomp_entry *entry;
+	size_t last_filter = 0, nr_chains = 0;
+	struct rb_node *node;
+	int ret;
 
-	/* If we didn't collect any filters, don't create a seccomp image at all. */
-	if (next_filter_id == 0)
-		return 0;
+	for (node = rb_first(&seccomp_tid_rb_root); node; node = rb_next(node)) {
+		entry = rb_entry(node, struct seccomp_entry, node);
+		nr_chains += entry->nr_chains;
+	}
 
-	se.seccomp_filters = xzalloc(sizeof(*se.seccomp_filters) * next_filter_id);
+	se.n_seccomp_filters = nr_chains;
+	se.seccomp_filters = xmalloc(sizeof(*se.seccomp_filters) * nr_chains);
 	if (!se.seccomp_filters)
 		return -1;
 
-	se.n_seccomp_filters = next_filter_id;
+	for (node = rb_first(&seccomp_tid_rb_root); node; node = rb_next(node)) {
+		entry = rb_entry(node, struct seccomp_entry, node);
+
+		if (!entry->nr_chains)
+			continue;
 
-	for (i = 0; i < next_filter_id; i++) {
-		SeccompFilter *sf;
-		struct seccomp_info *cur = filters[i];
+		for (chain = entry->chain; chain; chain = chain->prev) {
+			BUG_ON(last_filter >= nr_chains);
 
-		sf = se.seccomp_filters[cur->id] = &cur->filter;
-		if (cur->prev) {
-			sf->has_prev = true;
-			sf->prev = cur->prev->id;
+			se.seccomp_filters[last_filter] = &chain->filter;
+			if (chain != entry->chain) {
+				chain->filter.has_prev = true;
+				chain->filter.prev = last_filter - 1;
+			}
+			last_filter++;
 		}
+
+		entry->last_filter = last_filter - 1;
 	}
 
 	ret = pb_write_one(img_from_set(glob_imgset, CR_FD_SECCOMP), &se, PB_SECCOMP);
 
 	xfree(se.seccomp_filters);
 
-	for (i = 0; i < next_filter_id; i++) {
-		struct seccomp_info *freeme = filters[i];
-
-		xfree(freeme->filter.filter.data);
-		xfree(freeme);
+	for (node = rb_first(&seccomp_tid_rb_root); node; node = rb_next(node)) {
+		entry = rb_entry(node, struct seccomp_entry, node);
+		seccomp_free_chain(entry);
 	}
-	xfree(filters);
 
 	return ret;
 }
 
-int collect_seccomp_filters(void)
+int seccomp_collect_dump_filters(void)
 {
-	if (preorder_pstree_traversal(root_item, collect_filter_for_pstree) < 0)
+	if (preorder_pstree_traversal(root_item, collect_filters) < 0)
 		return -1;
 
 	if (dump_seccomp_filters())
-- 
2.14.3



More information about the CRIU mailing list