[CRIU] [PATCH 15/28] seccomp: Add support of per thread filters on dump
Cyrill Gorcunov
gorcunov at gmail.com
Wed Mar 21 00:43:00 MSK 2018
From: Cyrill Gorcunov <gorcunov at virtuozzo.com>
Signed-off-by: Cyrill Gorcunov <gorcunov at virtuozzo.com>
---
criu/cr-dump.c | 2 +-
criu/include/seccomp.h | 40 +++++---
criu/seccomp.c | 243 +++++++++++++++++++++----------------------------
3 files changed, 134 insertions(+), 151 deletions(-)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index eecfa310ed47..7b6f89e4e181 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1930,7 +1930,7 @@ int cr_dump_tasks(pid_t pid)
if (!glob_imgset)
goto err;
- if (collect_seccomp_filters() < 0)
+ if (seccomp_collect_dump_filters() < 0)
goto err;
/* Errors handled later in detect_pid_reuse */
diff --git a/criu/include/seccomp.h b/criu/include/seccomp.h
index 96320f863fff..ff0465b8266f 100644
--- a/criu/include/seccomp.h
+++ b/criu/include/seccomp.h
@@ -30,11 +30,35 @@
struct pstree_item;
struct rb_node;
+/*
+ * seccomp filters are bound to @current->seccomp.filter
+ * in the kernel, ie they are per thread structures.
+ *
+ * If filter is assigned then every subsequent call
+ * to fork() makes a copy of this @current->seccomp.filter
+ * pointer into child process.
+ *
+ * The thread group can share a filter if the filter
+ * is assigned with SECCOMP_FILTER_FLAG_TSYNC on group
+ * which has no filters yet.
+ *
+ * To find identity we have to use memcmp because we
+ * don't have access to @current->seccomp.filter pointers
+ * FIXME: Provide kcmp mode for that.
+ */
+struct seccomp_filter_chain {
+ struct seccomp_filter_chain *prev;
+ SeccompFilter filter;
+};
+
struct seccomp_entry {
- struct rb_node node;
- pid_t tid_real;
- size_t last_filter;
- unsigned int mode;
+ struct rb_node node;
+ pid_t tid_real;
+ size_t last_filter;
+ unsigned int mode;
+
+ struct seccomp_filter_chain *chain;
+ size_t nr_chains;
};
extern struct seccomp_entry *seccomp_lookup(pid_t tid_real, bool create, bool mandatory);
@@ -42,14 +66,8 @@ extern struct seccomp_entry *seccomp_lookup(pid_t tid_real, bool create, bool ma
extern int seccomp_collect_entry(pid_t tid_real, unsigned int mode);
extern void seccomp_free_entries(void);
extern int seccomp_dump_thread(pid_t tid_real, ThreadCoreEntry *thread_core);
+extern int seccomp_collect_dump_filters(void);
-struct seccomp_info {
- struct seccomp_info *prev;
- int id;
- SeccompFilter filter;
-};
-
-extern int collect_seccomp_filters(void);
extern int prepare_seccomp_filters(void);
struct task_restore_args;
extern int seccomp_filters_get_rst_pos(CoreEntry *item, struct task_restore_args *);
diff --git a/criu/seccomp.c b/criu/seccomp.c
index 7d39bcc69808..0f0866e0c4a5 100644
--- a/criu/seccomp.c
+++ b/criu/seccomp.c
@@ -75,6 +75,21 @@ int seccomp_collect_entry(pid_t tid_real, unsigned int mode)
return 0;
}
+static void seccomp_free_chain(struct seccomp_entry *entry)
+{
+ struct seccomp_filter_chain *chain, *prev;
+
+ for (chain = entry->chain; chain; chain = prev) {
+ prev = chain->prev;
+
+ xfree(chain->filter.filter.data);
+ xfree(chain);
+ }
+
+ entry->nr_chains = 0;
+ entry->chain = NULL;
+}
+
void seccomp_free_entries(void)
{
struct seccomp_entry *entry;
@@ -108,209 +123,159 @@ int seccomp_dump_thread(pid_t tid_real, ThreadCoreEntry *thread_core)
return 0;
}
-/* populated on dump during collect_seccomp_filters() */
-static int next_filter_id = 0;
-static struct seccomp_info **filters = NULL;
-
-static struct seccomp_info *find_inherited(int last_filter, struct sock_filter *filter,
- int len, struct seccomp_metadata *meta)
-{
- struct seccomp_info *info;
-
- /* if we have no filters yet, this one has no parent */
- if (!filters)
- return NULL;
-
- for (info = filters[last_filter]; info; info = info->prev) {
-
- if (len != info->filter.filter.len)
- continue;
- if (!!meta ^ !!info->filter.has_flags)
- continue;
- if (info->filter.has_flags && meta) {
- if (info->filter.flags != meta->flags)
- continue;
- }
- if (!memcmp(filter, info->filter.filter.data, len))
- return info;
- }
-
- return NULL;
-}
-
-static int collect_filter_for_pstree(struct pstree_item *item)
+static int collect_filter(struct seccomp_entry *entry)
{
struct seccomp_metadata meta_buf, *meta = &meta_buf;
- struct seccomp_info *infos = NULL, *cursor;
- struct seccomp_entry *entry, *entry_parent;
- int info_count, i, ret = -1;
+ struct seccomp_filter_chain *chain, *prev;
struct sock_filter buf[BPF_MAXINSNS];
- void *m;
-
- if (item->pid->state == TASK_DEAD)
- return 0;
+ size_t pos;
+ int len;
- entry = seccomp_find_entry(item->pid->real);
- if (!entry)
- return -1;
if (entry->mode != SECCOMP_MODE_FILTER)
return 0;
- for (i = 0; true; i++) {
- int len;
- struct seccomp_info *info, *inherited = NULL;
-
- len = ptrace(PTRACE_SECCOMP_GET_FILTER, item->pid->real, i, buf);
+ for (pos = 0; true; pos++) {
+ len = ptrace(PTRACE_SECCOMP_GET_FILTER, entry->tid_real, pos, buf);
if (len < 0) {
if (errno == ENOENT) {
- /* end of the search */
- BUG_ON(i == 0);
- goto save_infos;
- } else if (errno == EINVAL) {
- pr_err("dumping seccomp infos not supported\n");
- goto out;
+ break;
} else {
- pr_perror("couldn't dump seccomp filter");
- goto out;
+ pr_perror("Can't fetch filter on tid_real %d pos %zu",
+ entry->tid_real, pos);
+ return -1;
}
}
if (!meta)
meta = &meta_buf;
- if (ptrace(PTRACE_SECCOMP_GET_METADATA, item->pid->real, i, meta) < 0) {
+ if (ptrace(PTRACE_SECCOMP_GET_METADATA, entry->tid_real, pos, meta) < 0) {
if (errno == EIO) {
meta = NULL;
} else {
- pr_perror("couldn't fetch seccomp metadata: pid %d pos %d",
- item->pid->real, i);
- goto out;
+ pr_perror("Can't fetch seccomp metadataon tid_real %d pos %zu",
+ entry->tid_real, pos);
+ return -1;
}
}
- entry_parent = seccomp_find_entry(item->parent->pid->real);
- if (!entry_parent)
- goto out;
- inherited = find_inherited(entry_parent->last_filter, buf, len, meta);
- if (inherited) {
- bool found = false;
-
- /* Small sanity check: if infos is already populated,
- * we should have inherited that filter too. */
- for (cursor = infos; cursor; cursor = cursor->prev) {
- if (inherited->prev== cursor) {
- found = true;
- break;
- }
- }
+ chain = xzalloc(sizeof(*chain));
+ if (!chain)
+ return -1;
- BUG_ON(!found);
+ seccomp_filter__init(&chain->filter);
- infos = inherited;
- continue;
+ chain->filter.filter.len = len * sizeof(struct sock_filter);
+ chain->filter.filter.data = xmalloc(chain->filter.filter.len);
+ if (!chain->filter.filter.data) {
+ xfree(chain);
+ return -1;
}
- info = xmalloc(sizeof(*info));
- if (!info)
- goto out;
- seccomp_filter__init(&info->filter);
+ memcpy(chain->filter.filter.data, buf, chain->filter.filter.len);
if (meta) {
- info->filter.has_flags = true;
- info->filter.flags = meta->flags;
+ chain->filter.has_flags = true;
+ chain->filter.flags = meta->flags;
}
- info->filter.filter.len = len * sizeof(struct sock_filter);
- info->filter.filter.data = xmalloc(info->filter.filter.len);
- if (!info->filter.filter.data) {
- xfree(info);
- goto out;
- }
+ prev = entry->chain, entry->chain = chain, chain->prev = prev;
+ entry->nr_chains++;
+ }
- memcpy(info->filter.filter.data, buf, info->filter.filter.len);
+ return 0;
+}
- info->prev = infos;
- infos = info;
- }
+static int collect_filters(struct pstree_item *item)
+{
+ struct seccomp_entry *parent, *leader, *entry;
+ size_t i;
-save_infos:
- info_count = i;
+ if (item->pid->state == TASK_DEAD)
+ return 0;
- m = xrealloc(filters, sizeof(*filters) * (next_filter_id + info_count));
- if (!m)
- goto out;
- filters = m;
-
- for (cursor = infos, i = info_count + next_filter_id - 1;
- i >= next_filter_id; i--) {
- BUG_ON(!cursor);
- cursor->id = i;
- filters[i] = cursor;
- cursor = cursor->prev;
+ parent = item->parent ? seccomp_find_entry(item->parent->pid->real) : NULL;
+ if (!parent && item->parent) {
+ pr_err("Can't collect filter on parent tid_real %d\n",
+ item->parent->pid->real);
+ return -1;
+ }
+ leader = seccomp_find_entry(item->pid->real);
+ if (!leader) {
+ pr_err("Can't collect filter on leader tid_real %d\n",
+ item->pid->real);
+ return -1;
}
- next_filter_id += info_count;
-
- entry->last_filter = infos->id;
+ for (i = 0; i < item->nr_threads; i++) {
+ entry = seccomp_find_entry(item->threads[i]->real);
+ if (!leader) {
+ pr_err("Can't collect filter on tid_real %d\n",
+ item->pid->real);
+ return -1;
+ }
- /* Don't free the part of the tree we just successfully acquired */
- infos = NULL;
- ret = 0;
-out:
- while (infos) {
- struct seccomp_info *freeme = infos;
- infos = infos->prev;
- xfree(freeme->filter.filter.data);
- xfree(freeme);
+ if (collect_filter(entry))
+ return -1;
}
- return ret;
+ return 0;
}
static int dump_seccomp_filters(void)
{
SeccompEntry se = SECCOMP_ENTRY__INIT;
- int ret = -1, i;
+ struct seccomp_filter_chain *chain;
+ struct seccomp_entry *entry;
+ size_t last_filter = 0, nr_chains = 0;
+ struct rb_node *node;
+ int ret;
- /* If we didn't collect any filters, don't create a seccomp image at all. */
- if (next_filter_id == 0)
- return 0;
+ for (node = rb_first(&seccomp_tid_rb_root); node; node = rb_next(node)) {
+ entry = rb_entry(node, struct seccomp_entry, node);
+ nr_chains += entry->nr_chains;
+ }
- se.seccomp_filters = xzalloc(sizeof(*se.seccomp_filters) * next_filter_id);
+ se.n_seccomp_filters = nr_chains;
+ se.seccomp_filters = xmalloc(sizeof(*se.seccomp_filters) * nr_chains);
if (!se.seccomp_filters)
return -1;
- se.n_seccomp_filters = next_filter_id;
+ for (node = rb_first(&seccomp_tid_rb_root); node; node = rb_next(node)) {
+ entry = rb_entry(node, struct seccomp_entry, node);
+
+ if (!entry->nr_chains)
+ continue;
- for (i = 0; i < next_filter_id; i++) {
- SeccompFilter *sf;
- struct seccomp_info *cur = filters[i];
+ for (chain = entry->chain; chain; chain = chain->prev) {
+ BUG_ON(last_filter >= nr_chains);
- sf = se.seccomp_filters[cur->id] = &cur->filter;
- if (cur->prev) {
- sf->has_prev = true;
- sf->prev = cur->prev->id;
+ se.seccomp_filters[last_filter] = &chain->filter;
+ if (chain != entry->chain) {
+ chain->filter.has_prev = true;
+ chain->filter.prev = last_filter - 1;
+ }
+ last_filter++;
}
+
+ entry->last_filter = last_filter - 1;
}
ret = pb_write_one(img_from_set(glob_imgset, CR_FD_SECCOMP), &se, PB_SECCOMP);
xfree(se.seccomp_filters);
- for (i = 0; i < next_filter_id; i++) {
- struct seccomp_info *freeme = filters[i];
-
- xfree(freeme->filter.filter.data);
- xfree(freeme);
+ for (node = rb_first(&seccomp_tid_rb_root); node; node = rb_next(node)) {
+ entry = rb_entry(node, struct seccomp_entry, node);
+ seccomp_free_chain(entry);
}
- xfree(filters);
return ret;
}
-int collect_seccomp_filters(void)
+int seccomp_collect_dump_filters(void)
{
- if (preorder_pstree_traversal(root_item, collect_filter_for_pstree) < 0)
+ if (preorder_pstree_traversal(root_item, collect_filters) < 0)
return -1;
if (dump_seccomp_filters())
--
2.14.3
More information about the CRIU
mailing list