[CRIU] [PATCH] cr-super: Initial commit

Andrew Vagin avagin at odin.com
Wed Sep 16 04:55:09 PDT 2015


On Wed, Sep 16, 2015 at 12:57:43PM +0300, Cyrill Gorcunov wrote:
> Running CRIU with root privilegues is required because of kernel's
> restrictions: in particular manipulating /proc/$pid/map_files/
> is guarded with CAP_SYS_ADMIN (which I plan to drop off in future
> but can't right now).
> 
> Still it is being found that when we run CRIU in service mode handling
> RPC requests introduces problems with security context
> 
> * CVE-2015-5228
> https://bugzilla.redhat.com/show_bug.cgi?id=1255782
> 
> * CVE-2015-5231
> https://bugzilla.redhat.com/show_bug.cgi?id=1256728
> 
> And here is an idea how to cork up such kind of problems:
> we introduce a small helper tool named 'cr-super' which
> should have suid bit set with proper owner and CRIU would
> run it when need to fetch information about map_files
> entries.
> 
> Basically, a client opens a socketpair and pass one into
> cr-super as an argument, in turn cr-super fetches commands
> from the socket, fill the information needed into the buffer
> and send it back to the client.
> 
> In this early commit I didn't add test for client uid/gid
> but need to check the client belong to say "criu" group
> which would be allowed to talk to cr-super.

In addition, we need to check that we are able to attache to a process
by ptrace.

I think for that we need to drop CAP_SYS_PTRACE from the effective set,
try to call PTRACE_SEIZE and if this operation was success, we can read
map_files.

> 
> Also, need to modify CRIU itself to start using this
> helper on demand (if criu itself is running under root
> than we don't need to use this service).
> 
> Thus it's an early draft, take a look, comments are welcome.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  Makefile         |  14 ++-
>  super/Makefile   |   1 +
>  super/cr-super.h |  59 +++++++++++
>  super/main.c     | 318 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 390 insertions(+), 2 deletions(-)
>  create mode 100644 super/Makefile
>  create mode 100644 super/cr-super.h
>  create mode 100644 super/main.c
> 
> diff --git a/Makefile b/Makefile
> index fdc6830f4e68..a36841270fab 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -187,13 +187,14 @@ build-crtools := -r -R -f scripts/Makefile.build makefile=Makefile.crtools obj
>  PROGRAM		:= criu
>  
>  .PHONY: all zdtm test rebuild clean distclean tags cscope	\
> -	docs help pie protobuf $(ARCH_DIR) clean-built lib crit
> +	docs help pie protobuf $(ARCH_DIR) clean-built lib crit	\
> +	super cr-super
>  
>  ifeq ($(GCOV),1)
>  %.o $(PROGRAM): override CFLAGS += --coverage
>  endif
>  
> -all: config pie $(VERSION_HEADER) $(CRIU-LIB)
> +all: config pie super cr-super $(VERSION_HEADER) $(CRIU-LIB)
>  	$(Q) $(MAKE) $(PROGRAM)
>  	$(Q) $(MAKE) crit
>  
> @@ -248,6 +249,15 @@ $(PROGRAM): $(SYSCALL-LIB) $(ARCH-LIB) $(PROGRAM-BUILTINS)
>  	$(E) "  LINK    " $@
>  	$(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
>  
> +super/%:: $(VERSION_HEADER) config built-in.o
> +	$(Q) $(MAKE) $(build)=super $@
> +super: $(VERSION_HEADER) config built-in.o
> +	$(Q) $(MAKE) $(build)=super all
> +
> +cr-super: super/built-in.o
> +	$(E) "  LINK    " $@
> +	$(Q) $(CC) $(CFLAGS) $^ $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
> +
>  crit:
>  	$(Q) $(MAKE) -C pycriu all
>  
> diff --git a/super/Makefile b/super/Makefile
> new file mode 100644
> index 000000000000..b666967fd570
> --- /dev/null
> +++ b/super/Makefile
> @@ -0,0 +1 @@
> +obj-y += main.o
> diff --git a/super/cr-super.h b/super/cr-super.h
> new file mode 100644
> index 000000000000..a5724a552a29
> --- /dev/null
> +++ b/super/cr-super.h
> @@ -0,0 +1,59 @@
> +#ifndef __CR_SUPER_H__
> +#define __CR_SUPER_H__
> +
> +#include <limits.h>
> +
> +enum {
> +	SUPER_RSP_OK				= 1,
> +	SUPER_RSP_ERR				= 2,
> +
> +	SUPER_REQ_EXIT				= 3,
> +
> +	SUPER_REQ_MFD_OPEN_DIR			= 4,

Why do we need to have a separate command to open a directory?

We can cache a directory desriptor and open a directory only
if prev_pid isn't equal to req->pid.

> +	SUPER_REQ_MFD_FILL_STAT			= 5,
> +	SUPER_REQ_MFD_FILL_LINK			= 6,
> +	SUPER_REQ_MFD_FILL_MNT_ID		= 7,

Why we can't have one only one command?

> +
> +	SUPER_REQ_MAX
> +};
> +
> +typedef struct {
> +	unsigned short				req_type;
> +	unsigned short				rsp_type;
> +	unsigned short				rsp_err;
> +	unsigned short				__pad;
> +} super_req_hdr_t;
> +
> +typedef struct {
> +	super_req_hdr_t				hdr;
> +	pid_t					pid;
> +} super_req_mfd_open_dir_t;
> +
> +typedef struct {
> +	super_req_hdr_t				hdr;
> +	/* IN */
> +	unsigned long				vma_start;
> +	unsigned long				vma_end;
> +	/* OUT */
> +	char					path[PATH_MAX];
> +} super_req_mfd_fill_link_t;
> +
> +typedef struct {
> +	super_req_hdr_t				hdr;
> +	/* IN */
> +	unsigned long				vma_start;
> +	unsigned long				vma_end;
> +	/* OUT */
> +	struct stat				st;
> +} super_req_mfd_fill_stat_t;
> +
> +typedef struct {
> +	super_req_hdr_t				hdr;
> +	/* IN */
> +	unsigned long				vma_start;
> +	unsigned long				vma_end;
> +	/* OUT */
> +	unsigned int				mnt_id;
> +} super_req_mfd_fill_mnt_id_t;
> +
> +#endif /* __CR_SUPER_H__ */
> diff --git a/super/main.c b/super/main.c
> new file mode 100644
> index 000000000000..b4f82e2d067c
> --- /dev/null
> +++ b/super/main.c
> @@ -0,0 +1,318 @@
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <getopt.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <stdarg.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/wait.h>
> +#include <sys/stat.h>
> +
> +#include "criu-log.h"
> +#include "xmalloc.h"
> +
> +#include "super/cr-super.h"
> +
> +#undef LOG_PREFIX
> +#define LOG_PREFIX "spr: "
> +
> +static int mfd_dir_fd = -1;
> +
> +#define __RSP_OK(__r)								\
> +	do {									\
> +		((super_req_hdr_t *)__r)->rsp_type = SUPER_RSP_OK;		\
> +		((super_req_hdr_t *)__r)->rsp_err = 0;				\
> +	} while (0)
> +
> +#define __RSP_ERR(__r, __err)							\
> +	do {									\
> +		((super_req_hdr_t *)__r)->rsp_type = SUPER_RSP_ERR;		\
> +		((super_req_hdr_t *)__r)->rsp_err = (__err);			\
> +	} while (0)
> +
> +#define __TRACE_SND_PKT(__r)							\
> +	do {									\
> +		pr_debug("snd -> req: %2d rsp: %2d err: %2d\n",			\
> +			 ((super_req_hdr_t *)__r)->req_type,			\
> +			 ((super_req_hdr_t *)__r)->rsp_type,			\
> +			 ((super_req_hdr_t *)__r)->rsp_err);			\
> +	} while (0)
> +
> +#define __TRACE_RCV_PKT(__r)							\
> +	do {									\
> +		pr_debug("rcv -> req: %2d\n",					\
> +			 ((super_req_hdr_t *)__r)->req_type);			\
> +	} while (0)
> +
> +void print_on_level(unsigned int loglevel, const char *format, ...)
> +{
> +	va_list params;
> +
> +	va_start(params, format);
> +	vprintf(format, params);
> +	va_end(params);
> +}
> +
> +static int handle_request(int sk, struct ucred *ids, void *req, size_t size)
> +{
> +	super_req_hdr_t			*r = req;
> +	super_req_mfd_fill_mnt_id_t	*mfd_fill_mnt_id;
> +	super_req_mfd_fill_stat_t	*mfd_fill_stat;
> +	super_req_mfd_fill_link_t	*mfd_fill_link;
> +	super_req_mfd_open_dir_t	*mfd_open_dir;
> +
> +	char buf[1024], *str;
> +	char path[128];
> +	FILE *f;
> +	ssize_t len;
> +	int fd;
> +
> +#define __check_opened(__r)					\
> +	do {							\
> +		if (mfd_dir_fd < 0) {				\
> +			pr_err("dir is not yet opened\n");	\
> +			__RSP_ERR(__r, -EINVAL);		\
> +			return -EINVAL;				\
> +		}						\
> +	} while (0)

I don't like macroses which calls "return"

> +
> +#define __check_size(__r, __type, __size)			\
> +	do {							\
> +		if ((__size) < sizeof(__type)) {		\
> +			pr_err("corrupted header\n");		\
> +			__RSP_ERR(__r, -EINVAL);		\
> +			return -EINVAL;				\
> +		}						\
> +	} while (0)
> +
> +	switch (r->req_type) {
> +	case SUPER_REQ_EXIT:
> +		break;
> +	case SUPER_REQ_MFD_OPEN_DIR:
> +		mfd_open_dir = req;
> +
> +		if (mfd_dir_fd >= 0) {
> +			pr_err("mfd already opened\n");
> +			__RSP_ERR(req, -EINVAL);
> +			return -EINVAL;
> +		}
> +
> +		snprintf(path, sizeof(path), "/proc/%d/map_files", mfd_open_dir->pid);
> +
> +		mfd_dir_fd = open(path, O_RDONLY);
> +		if (mfd_dir_fd < 0) {
> +			pr_perror("Can't open %s", path);
> +			__RSP_ERR(r, -errno);
> +			return -errno;
> +		}
> +		break;
> +	case SUPER_REQ_MFD_FILL_STAT:
> +		mfd_fill_stat = req;
> +
> +		__check_opened(r);
> +		__check_size(r, typeof(*mfd_fill_stat), size);
> +
> +		snprintf(path, sizeof(path), "%lx-%lx",
> +			 mfd_fill_stat->vma_start, mfd_fill_stat->vma_end);
> +
> +		fd = openat(mfd_dir_fd, path, O_RDONLY);
> +		if (fd < 0) {
> +			pr_perror("Can't open %s", path);
> +			__RSP_ERR(r, -errno);
> +			return -errno;
> +		}
> +		if (fstatat(mfd_dir_fd, path, &mfd_fill_stat->st, 0)) {
> +			pr_perror("Can't stat %s", path);
> +			__RSP_ERR(r, -errno);
> +			return -errno;
> +		}
> +		close(fd);
> +		break;
> +	case SUPER_REQ_MFD_FILL_LINK:
> +		mfd_fill_link = req;
> +
> +		__check_opened(r);
> +		__check_size(r, typeof(*mfd_fill_link), size);
> +
> +		snprintf(path, sizeof(path), "%lx-%lx",
> +			 mfd_fill_link->vma_start, mfd_fill_link->vma_end);
> +
> +		mfd_fill_link->path[0] = '.';
> +		len = readlinkat(mfd_dir_fd, path, &mfd_fill_link->path[1],
> +				 sizeof(mfd_fill_link->path) - 1);
> +		if (len < 0 || len == (sizeof(mfd_fill_link->path) - 1)) {
> +			pr_perror("Can't readlink on %s", path);
> +			__RSP_ERR(r, len < 0 ? -errno : -ENOSPC);
> +			return len < 0 ? -errno : -ENOSPC;
> +		}
> +		mfd_fill_link->path[len] = 0;
> +		break;
> +	case SUPER_REQ_MFD_FILL_MNT_ID:
> +		mfd_fill_mnt_id = req;
> +
> +		__check_opened(r);
> +		__check_size(r, typeof(*mfd_fill_mnt_id), size);
> +
> +		snprintf(path, sizeof(path), "%lx-%lx",
> +			 mfd_fill_mnt_id->vma_start,
> +			 mfd_fill_mnt_id->vma_end);
> +
> +		fd = openat(mfd_dir_fd, path, O_RDONLY);
> +		if (fd < 0) {
> +			pr_perror("Can't open %s", path);
> +			__RSP_ERR(r, -errno);
> +			return -errno;
> +		}
> +
> +		snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);
> +		f = fopen(path, "r");
> +		if (!f) {
> +			pr_perror("Can't open %s", path);
> +			__RSP_ERR(r, -errno);
> +			return -errno;
> +		}
> +
> +		mfd_fill_mnt_id->mnt_id = -1;
> +
> +		while ((str = fgets(buf, sizeof(buf), f))) {
> +			if (strncmp(str, "mnt_id:\t", 8))
> +				continue;
> +			mfd_fill_mnt_id->mnt_id = atol(&str[8]);
> +			break;
> +		}
> +
> +		fclose(f);
> +		close(fd);
> +
> +		if (mfd_fill_mnt_id->mnt_id == -1) {
> +			pr_err("Can't parse fdinfo of %lx-%lx\n",
> +			       mfd_fill_mnt_id->vma_start,
> +			       mfd_fill_mnt_id->vma_end);
> +			__RSP_ERR(r, -ENOENT);
> +			return -ENOENT;
> +		}
> +		break;
> +	default:
> +		pr_err("Unknown request type\n");
> +		__RSP_ERR(r, -EINVAL);
> +		return -ENOENT;
> +	}
> +
> +	__RSP_OK(r);
> +	return 0;
> +
> +#undef __check_size
> +#undef __check_opened
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int socket_fd = -1;
> +	int opt, idx;
> +	int ret = 1;
> +
> +	void *reqcv_buf = NULL;
> +	super_req_hdr_t *r;
> +	ssize_t recv_size;
> +	ssize_t send_size;
> +
> +	struct ucred ids;
> +	socklen_t ids_len = sizeof(ids);
> +
> +	static const char short_opts[] = "f:";
> +	static struct option long_opts[] = {
> +		{ "fd",	required_argument,	0, 'f'	},
> +		{ },
> +	};
> +
> +	while (1) {
> +		idx = -1;
> +		opt = getopt_long(argc, argv, short_opts, long_opts, &idx);
> +		if (opt == -1)
> +			break;
> +
> +		switch (opt) {
> +		case 'f':
> +			socket_fd = atoi(optarg);
> +			break;
> +		default:
> +			goto out;
> +		}
> +	}
> +
> +	if (socket_fd < 0)
> +		goto usage;
> +
> +	if (getsockopt(socket_fd, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) {
> +		pr_perror("Can't get socket options");
> +		goto out;
> +	}
> +
> +	/*
> +	 * FIXME: Add test that we have such user in a proper group.
> +	 */
> +
> +	pr_debug("Listening\n");
> +	for (;;) {
> +		recv_size = recv(socket_fd, NULL, 0, MSG_TRUNC | MSG_PEEK);
> +		if (recv_size < sizeof(super_req_hdr_t)) {
> +			pr_perror("Can't read request");
> +			goto out;
> +		}
> +
> +		reqcv_buf = xmalloc(recv_size);
> +		if (!reqcv_buf)
> +			goto out;
> +
> +		recv_size = recv(socket_fd, reqcv_buf, recv_size, MSG_TRUNC);
> +		if (recv_size < 0) {
> +			pr_perror("Can't read request");
> +			goto out;
> +		} else if (recv_size == 0)
> +			goto out;
> +
> +		__TRACE_RCV_PKT(reqcv_buf);
> +
> +		ret = handle_request(socket_fd, &ids, reqcv_buf, recv_size);
> +		if (ret < 0)
> +			goto out;
> +
> +		__TRACE_SND_PKT(reqcv_buf);
> +
> +		send_size = send(socket_fd, reqcv_buf, recv_size, 0);
> +		if (send_size < recv_size) {
> +			pr_perror("Can't send request");

Don't mix ret and the function exit code.

Here ret is zero, so you return zero code for the error case

> +			goto out;
> +		}
> +
> +		r = reqcv_buf;
> +		if (r->req_type == SUPER_REQ_EXIT)
> +			break;
> +
> +		xfree(reqcv_buf);
> +		reqcv_buf = NULL;
> +	}
> +
> +	ret = 0;
> +out:
> +	if (socket_fd >= 0)
> +		close(socket_fd);
> +	xfree(reqcv_buf);
> +	return ret;
> +
> +usage:
> +	printf("%s: --fd <num>\n", argv[0][1]);

This should be printed on stderr

> +	return 1;
> +}
> -- 
> 2.4.3
> 


More information about the CRIU mailing list