[CRIU] [PATCH 3/7] seccomp: add support for SECCOMP_MODE_FILTER

Tycho Andersen tycho.andersen at canonical.com
Wed Nov 11 08:18:33 PST 2015


On Wed, Nov 11, 2015 at 04:43:49PM +0300, Pavel Emelyanov wrote:
> I have no comments to fix, but still there are things I don't understand :)
> So I have not comments, but several questions inline.
> 
> > diff --git a/include/rst_info.h b/include/rst_info.h
> > index b72e5d0..9d06621 100644
> > --- a/include/rst_info.h
> > +++ b/include/rst_info.h
> > @@ -64,6 +64,7 @@ struct rst_info {
> >  	 * restorer blob.
> >  	 */
> >  	bool			has_seccomp;
> > +	struct sock_fprog	*seccomp_filters;
> 
> This field seems to be unused, doesn't it?

Yes, probably left over from when I ported it over from some other
version of the kernel API, thanks.

> >  
> >  	void			*breakpoint;
> >  };
> 
> > diff --git a/seccomp.c b/seccomp.c
> > new file mode 100644
> > index 0000000..930feb9
> > --- /dev/null
> > +++ b/seccomp.c
> > @@ -0,0 +1,258 @@
> > +#include <linux/filter.h>
> > +#include <sys/types.h>
> > +#include <sys/wait.h>
> > +#include <unistd.h>
> > +
> > +#include "config.h"
> > +#include "imgset.h"
> > +#include "kcmp.h"
> > +#include "pstree.h"
> > +#include "ptrace.h"
> > +#include "proc_parse.h"
> > +#include "seccomp.h"
> > +#include "servicefd.h"
> > +#include "util.h"
> > +#include "rst-malloc.h"
> > +
> > +#include "protobuf.h"
> > +#include "protobuf/seccomp.pb-c.h"
> > +
> > +/* populated on dump during collect_seccomp_filters() */
> > +static int next_filter_id = 0;
> > +static struct seccomp_info **filters = NULL;
> > +
> > +static struct seccomp_info *find_inherited(struct pstree_item *parent,
> > +					   struct sock_filter *filter, int len)
> > +{
> > +	struct seccomp_info *info;
> > +
> > +	/* if we have no filters yet, this one has no parent */
> > +	if (!filters)
> > +		return NULL;
> > +
> > +	for (info = filters[dmpi(parent)->pi_creds->last_filter]; info; info = info->prev) {
> > +
> > +		if (len != info->filter.filter.len)
> > +			continue;
> > +		if (!memcmp(filter, info->filter.filter.data, len))
> > +			return info;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +static int collect_filter_for_pstree(struct pstree_item *item)
> > +{
> > +	struct seccomp_info *infos = NULL, *cursor;
> > +	int info_count, i, ret = -1;
> > +	struct sock_filter buf[BPF_MAXINSNS];
> > +	void *m;
> > +
> > +	if (item->state == TASK_DEAD ||
> > +	    dmpi(item)->pi_creds->seccomp_mode != SECCOMP_MODE_FILTER)
> > +		return 0;
> > +
> > +	for (i = 0; true; i++) {
> > +		int len;
> > +		struct seccomp_info *info, *inherited = NULL;
> > +
> > +		len = ptrace(PTRACE_SECCOMP_GET_FILTER, item->pid.real, i, buf);
> > +		if (len < 0) {
> > +			if (errno == ENOENT) {
> > +				/* end of the search */
> > +				BUG_ON(i == 0);
> > +				goto save_infos;
> > +			} else if (errno == EINVAL) {
> > +				pr_err("dumping seccomp infos not supported\n");
> > +				goto out;
> > +			} else {
> > +				pr_perror("couldn't dump seccomp filter");
> > +				goto out;
> > +			}
> > +		}
> > +
> > +		inherited = find_inherited(item->parent, buf, len);
> > +		if (inherited) {
> > +			infos = inherited;
> 
> Can it happen that infos is not NULL here? Is it bad that we just overwrite the value?

Good question. Since we're walking "down" the filter tree, it can
happen, but all the previous filters *should* be inherited too, so we
will have done no allocation on them as they were all grabbed from
some other process. So, it should be safe as long as there are no bugs
:)

That said, it should be easy enough to add some instrumentation here
so that we can enforce this with at least a BUG_ON. I'll resend a
version with that.

> > +			continue;
> > +		}
> > +
> > +		info = xmalloc(sizeof(*info));
> > +		if (!info)
> > +			goto out;
> > +		seccomp_filter__init(&info->filter);
> > +
> > +		info->filter.filter.len = len * sizeof(struct sock_filter);
> > +		info->filter.filter.data = xmalloc(info->filter.filter.len);
> > +		if (!info->filter.filter.data)
> > +			goto out;
> > +
> > +		memcpy(info->filter.filter.data, buf, info->filter.filter.len);
> > +
> > +		info->prev = infos;
> > +		infos = info;
> > +	}
> > +
> > +save_infos:
> > +	info_count = i;
> > +
> > +	m = xrealloc(filters, sizeof(*filters) * (next_filter_id + info_count));
> 
> Why do we allocate that many memory? Part of the filters where "inherited".
> Can you describe what kind of data structure is built behind the filters
> pointer? It's some tree, but I don't get how it exactly looks and how pstree
> items point inside it.

Yeah, this is a little strange. There are two sets of pointers to the
filters. The first is the "tree" style from pstree, used for resolving
inheritance relationships. The second set is this one, which is just a
big array with everything. This is used for dumping (and freeing) the
actual filters.

We could ditch the tree style ones from pstree and just use the big
array (and keep array ids in pstree), but it seemed a little nicer to
me this way. But I could be convinced otherwise :)

> > +	if (!m)
> > +		goto out;
> > +	filters = m;
> > +
> > +	for (cursor = infos, i = info_count + next_filter_id - 1;
> > +	     i >= next_filter_id; i--) {
> > +		BUG_ON(!cursor);
> > +		cursor->id = i;
> > +		filters[i] = cursor;
> > +		cursor = cursor->prev;
> > +	}
> > +
> > +	next_filter_id += info_count;
> > +
> > +	dmpi(item)->pi_creds->last_filter = infos->id;
> > +
> > +	/* Don't free the part of the tree we just successfully acquired */
> > +	infos = NULL;
> > +	ret = 0;
> > +out:
> > +	while (infos) {
> > +		struct seccomp_info *freeme = infos;
> > +		infos = infos->prev;
> > +		xfree(freeme->filter.filter.data);
> > +		xfree(freeme);
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static int dump_seccomp_filters(void)
> > +{
> > +	SeccompEntry se = SECCOMP_ENTRY__INIT;
> > +	int ret = -1, i;
> > +
> > +	/* If we didn't collect any filters, don't create a seccomp image at all. */
> > +	if (next_filter_id == 0)
> > +		return 0;
> > +
> > +	se.seccomp_filters = xzalloc(sizeof(*se.seccomp_filters) * next_filter_id);
> > +	if (!se.seccomp_filters)
> > +		return -1;
> > +
> > +	se.n_seccomp_filters = next_filter_id;
> > +
> > +	for (i = 0; i < next_filter_id; i++) {
> > +		SeccompFilter *sf;
> > +		struct seccomp_info *cur = filters[i];
> > +
> > +		sf = se.seccomp_filters[cur->id] = &cur->filter;
> > +		if (cur->prev) {
> > +			sf->has_prev = true;
> > +			sf->prev = cur->prev->id;
> > +		}
> > +	}
> > +
> > +	ret = pb_write_one(img_from_set(glob_imgset, CR_FD_SECCOMP), &se, PB_SECCOMP);
> > +
> > +	xfree(se.seccomp_filters);
> > +
> > +	for (i = 0; i < next_filter_id; i++) {
> > +		struct seccomp_info *freeme = filters[i];
> > +
> > +		xfree(freeme->filter.filter.data);
> > +		xfree(freeme);
> > +	}
> > +	xfree(filters);
> > +
> > +	return ret;
> > +}
> > +
> > +int collect_seccomp_filters(void)
> > +{
> > +	if (preorder_pstree_traversal(root_item, collect_filter_for_pstree) < 0)
> > +		return -1;
> > +
> > +	if (dump_seccomp_filters())
> > +		return -1;
> > +
> > +	return 0;
> > +}
> > +
> > +/* Populated on restore by prepare_seccomp_filters */
> > +static SeccompEntry *se;
> > +
> > +int prepare_seccomp_filters(void)
> > +{
> > +	struct cr_img *img;
> > +	int ret;
> > +
> > +	img = open_image(CR_FD_SECCOMP, O_RSTR);
> > +	if (!img)
> > +		return -1;
> > +
> > +	ret = pb_read_one_eof(img, &se, PB_SECCOMP);
> > +	close_image(img);
> > +	if (ret <= 0)
> > +		return 0; /* there were no filters */
> > +
> > +	BUG_ON(!se);
> > +
> > +	return 0;
> > +}
> > +
> > +int seccomp_filters_get_rst_pos(CoreEntry *core, int *count, unsigned long *pos)
> > +{
> > +	SeccompFilter *sf = NULL;
> > +	struct sock_fprog *arr = NULL;
> > +	int ret = -1, i;
> > +
> > +	if (!core->tc->has_seccomp_filter) {
> > +		*count = 0;
> > +		return 0;
> > +	}
> > +
> > +	*count = 0;
> > +	*pos = rst_mem_cpos(RM_PRIVATE);
> > +
> > +	BUG_ON(core->tc->seccomp_filter > se->n_seccomp_filters);
> > +	sf = se->seccomp_filters[core->tc->seccomp_filter];
> > +
> > +	while (1) {
> > +		(*count)++;
> > +
> > +		if (!sf->has_prev)
> > +			break;
> > +
> > +		sf = se->seccomp_filters[sf->prev];
> > +	}
> > +
> > +	arr = rst_mem_alloc(sizeof(*arr) * (*count), RM_PRIVATE);
> > +	if (!arr)
> > +		goto out;
> > +
> > +	sf = se->seccomp_filters[core->tc->seccomp_filter];
> > +	for (i = 0; i < *count; i++) {
> > +		struct sock_fprog *fprog = &arr[i];
> > +		// XXX: this is a bit fugly; is there a better way?
> 
> I didn't get a better way for what you were looking for :)

The problem here is that we want to send a struct sock_fprog to the
restorer blob, which itself has a pointer to the actual instruction
array. So we want to pass a double pointer to the restorer blob. I
could do something like pack the second array onto the end of the
struct sock_fprog, but it is nice to be able to refer to the first
array without having to do math by hand.

The solution here is to just save the rst_mem_alloc value into the
struct where the second level pointer goes, and then remap the result
after we remap the struct. That feels sort of broken, though :). But
maybe not.

Tycho


More information about the CRIU mailing list