[CRIU] [PATCH 1/3] cr-super: Initial commit

Andrew Vagin avagin at odin.com
Thu Sep 24 12:19:26 PDT 2015


On Thu, Sep 24, 2015 at 07:37:13PM +0300, Cyrill Gorcunov wrote:
> Running CRIU with root privilegues is required because of kernel's
> restrictions: in particular manipulating /proc/$pid/map_files/
> is guarded with CAP_SYS_ADMIN (which I plan to drop off in future
> but can't right now).
> 
> Still it is being found that when we run CRIU in service mode handling
> RPC requests introduces problems with security context
> 
>   * CVE-2015-5228
>     https://bugzilla.redhat.com/show_bug.cgi?id=1255782
> 
>   * CVE-2015-5231
>     https://bugzilla.redhat.com/show_bug.cgi?id=1256728
> 
> And here is an idea how to cork up such kind of problems:
> we introduce a small helper tool named 'cr-super' which
> should have suid bit set with proper owner and CRIU would
> run it when need to fetch information about map_files
> entries.
> 
> Basically, a client opens a socketpair and pass one into
> cr-super as an argument, then it asks for file descriptor
> transfers via SUPER_REQ_MFD_DRAIN packet, cr-super opens
> the appropriate map_files/ entry with O_PATH flag and
> send it back into a caller.
> 
> Another option is to fetch information about the interested
> entry via SUPER_REQ_STAT command, cr-super opens the appropriate
> map_files/ entries in the following way
> 
>  - open wit O_RDNONLY
>   - if it fails, then it saves @errno to be sent back
>    - opens with O_PATH
>  - fetch stat() over the entry
>  - finds mnt_id from procfs
>  - send all this information back to a caller via net packet.
> 
> The victim which are requested for map_files must be
> ptraced by a client already, otherwise cr-super will
> refuse to proceed the request.
> 
> In this early commit I didn't add test for client uid/gid
> but need to check the client belong to say "criu" group
> which would be allowed to talk to cr-super.
> 
> Also, need to modify CRIU itself to start using this
> helper on demand (if criu itself is running under root
> than we don't need to use this service).
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  Makefile            |  15 ++-
>  super/Makefile      |   1 +
>  super/cr-super.h    |  52 ++++++++
>  super/main.c        | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  super/nocc-shared.c |  65 ++++++++++
>  5 files changed, 493 insertions(+), 2 deletions(-)
>  create mode 100644 super/Makefile
>  create mode 100644 super/cr-super.h
>  create mode 100644 super/main.c
>  create mode 100644 super/nocc-shared.c
> 
> diff --git a/Makefile b/Makefile
> index fdc6830f4e68..056603cd6c53 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -187,13 +187,14 @@ build-crtools := -r -R -f scripts/Makefile.build makefile=Makefile.crtools obj
>  PROGRAM		:= criu
>  
>  .PHONY: all zdtm test rebuild clean distclean tags cscope	\
> -	docs help pie protobuf $(ARCH_DIR) clean-built lib crit
> +	docs help pie protobuf $(ARCH_DIR) clean-built lib crit	\
> +	super cr-super
>  
>  ifeq ($(GCOV),1)
>  %.o $(PROGRAM): override CFLAGS += --coverage
>  endif
>  
> -all: config pie $(VERSION_HEADER) $(CRIU-LIB)
> +all: config pie super cr-super $(VERSION_HEADER) $(CRIU-LIB)
>  	$(Q) $(MAKE) $(PROGRAM)
>  	$(Q) $(MAKE) crit
>  
> @@ -248,6 +249,15 @@ $(PROGRAM): $(SYSCALL-LIB) $(ARCH-LIB) $(PROGRAM-BUILTINS)
>  	$(E) "  LINK    " $@
>  	$(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
>  
> +super/%:: $(VERSION_HEADER) config built-in.o pie/util-fd.o $(SYSCALL-LIB)
> +	$(Q) $(MAKE) $(build)=super $@
> +super: $(VERSION_HEADER) config built-in.o pie/util-fd.o $(SYSCALL-LIB)
> +	$(Q) $(MAKE) $(build)=super all
> +
> +cr-super: super/built-in.o pie/util-fd.o $(SYSCALL-LIB)
> +	$(E) "  LINK    " $@
> +	$(Q) $(CC) $(CFLAGS) $^ $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
> +
>  crit:
>  	$(Q) $(MAKE) -C pycriu all
>  
> @@ -265,6 +275,7 @@ clean-built:
>  	$(Q) $(MAKE) $(build)=pie clean
>  	$(Q) $(MAKE) $(build)=lib clean
>  	$(Q) $(MAKE) $(build-crtools)=. clean
> +	$(Q) $(MAKE) $(build)=super clean
>  	$(Q) $(MAKE) -C Documentation clean
>  	$(Q) $(RM) ./include/config.h
>  	$(Q) $(RM) ./$(PROGRAM)
> diff --git a/super/Makefile b/super/Makefile
> new file mode 100644
> index 000000000000..b666967fd570
> --- /dev/null
> +++ b/super/Makefile
> @@ -0,0 +1 @@
> +obj-y += main.o
> diff --git a/super/cr-super.h b/super/cr-super.h
> new file mode 100644
> index 000000000000..3ad3605be968
> --- /dev/null
> +++ b/super/cr-super.h
> @@ -0,0 +1,52 @@
> +#ifndef __CR_SUPER_H__
> +#define __CR_SUPER_H__
> +
> +#include <sys/types.h>
> +
> +enum {
> +	SUPER_RSP_OK				= 1,
> +	SUPER_RSP_ERR				= 2,
> +
> +	SUPER_REQ_PING				= 3,
> +	SUPER_REQ_EXIT				= 4,
> +	SUPER_REQ_MFD_DRAIN			= 5,
> +	SUPER_REQ_STAT				= 6,
> +
> +	SUPER_REQ_MAX
> +};
> +
> +typedef struct {
> +	unsigned short				req_type;
> +	unsigned short				rsp_type;
> +	int					rsp_err;
> +} super_req_hdr_t;
> +
> +typedef struct {
> +	unsigned long				start;
> +	unsigned long				end;
> +} super_req_vma_t;
> +
> +typedef struct {
> +	super_req_hdr_t				hdr;
> +	pid_t					pid;
> +	size_t					nr_vmas;
> +	super_req_vma_t				vma[0];
> +} super_req_mfd_drain_t;
> +
> +typedef struct {
> +	int					mnt_id;
> +	struct stat				st;
> +	int					o_rdonly_error;
> +} super_req_mfd_info_entry_t;
> +
> +typedef struct {
> +	super_req_hdr_t				hdr;
> +	pid_t					pid;
> +	size_t					nr_vmas;
> +	union {
> +		super_req_vma_t			vma[0];
> +		super_req_mfd_info_entry_t	info[0];
> +	} u;
> +} super_req_mfd_info_t;
> +
> +#endif /* __CR_SUPER_H__ */
> diff --git a/super/main.c b/super/main.c
> new file mode 100644
> index 000000000000..d4559df14905
> --- /dev/null
> +++ b/super/main.c
> @@ -0,0 +1,362 @@
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <getopt.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <stdarg.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +#include <limits.h>
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/wait.h>
> +#include <sys/stat.h>
> +
> +#include "criu-log.h"
> +#include "util-pie.h"
> +#include "xmalloc.h"
> +
> +#include "super/cr-super.h"
> +
> +#undef LOG_PREFIX
> +#define LOG_PREFIX "spr: "
> +
> +static int cur_loglevel = DEFAULT_LOGLEVEL;
> +static int log_file_fd = STDOUT_FILENO;
> +static int mfd_dir_fd = -1;
> +
> +void print_on_level(unsigned int loglevel, const char *format, ...)
> +{
> +	va_list params;
> +	char buf[4096];
> +	int size;
> +
> +	if (loglevel > cur_loglevel)
> +		return;
> +
> +	va_start(params, format);
> +	size = vsnprintf(buf, sizeof(buf), format, params);
> +	if (size > 0)
> +		write(log_file_fd, buf, size);
> +	va_end(params);
> +}
> +
> +static int send_ok(int sk, super_req_hdr_t *r)
> +{
> +	r->rsp_type = SUPER_RSP_OK;
> +	r->rsp_err = 0;
> +
> +	pr_debug("<- req %2d rsp %2d err %2d\n", r->req_type, SUPER_RSP_OK, 0);
> +	if (send(sk, r, sizeof(*r), 0) != sizeof(*r)) {
> +		pr_perror("Can't send ok response");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int send_err(int sk, super_req_hdr_t *r, int error)
> +{
> +	r->rsp_type = SUPER_RSP_ERR;
> +	r->rsp_err = error;
> +
> +	pr_debug("<- req %2d rsp %2d err %2d\n", r->req_type, SUPER_RSP_ERR, error);
> +	if (send(sk, r, sizeof(*r), 0) != sizeof(*r)) {
> +		pr_perror("Can't send error response");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static bool match_tracer_pid(pid_t tracer_pid, pid_t pid)
> +{
> +	char buf[512], *str;
> +	bool match = false;
> +	char path[128];
> +	FILE *f;
> +
> +	snprintf(path, sizeof(path), "/proc/%d/status", pid);
> +	f = fopen(path, "r");
> +	if (!f) {
> +		pr_perror("Can't open %s\n", path);
> +		return -1;
> +	}
> +
> +	while ((str = fgets(buf, sizeof(buf), f))) {
> +		if (strncmp(str, "TracerPid:\t", 11))
> +			continue;
> +		if (tracer_pid == atoi(&str[11]))
> +			match = true;
> +		break;
> +	}
> +	fclose(f);
> +
> +	return match;
> +}
> +
> +#include "super/nocc-shared.c"
> +
> +static int get_mfd_dir_fd(struct ucred *ids, pid_t pid, mode_t mode)
> +{
> +	if (!match_tracer_pid(ids->pid, pid)) {
> +		pr_err("Pids mismatch\n");
> +		return -EPERM;
> +	}
> +
> +	if (mfd_dir_fd < 0) {
> +		char path[64];
> +
> +		snprintf(path, sizeof(path), "/proc/%d/map_files", pid);
> +		mfd_dir_fd = open(path, mode);
> +		if (mfd_dir_fd < 0) {
> +			pr_perror("Can't open map_files");
> +			return -errno;
> +		}
> +	}
> +
> +	return mfd_dir_fd;
> +}
> +
> +static int handle_request(int sk, struct ucred *ids, void *r, size_t size)
> +{
> +	super_req_hdr_t *__r = r;
> +
> +	pr_debug("-> req %2d\n", __r->req_type);
> +
> +	switch (__r->req_type) {
> +	case SUPER_REQ_PING:
> +		send_ok(sk, r);
> +		break;
> +	case SUPER_REQ_EXIT:
> +		if (mfd_dir_fd >= 0) {
> +			close(mfd_dir_fd);
> +			mfd_dir_fd = -1;
> +		}
> +
> +		send_ok(sk, r);
> +		return 1;
> +		break;
> +	case SUPER_REQ_STAT:
> +	{
> +		super_req_mfd_info_t *req = r;
> +		super_req_mfd_info_t *rsp;
> +
> +		ssize_t rsp_len, rsp_size;
> +		int dir_fd;
> +		size_t i;
> +
> +		if (req->nr_vmas > CR_SCM_MAX_FD) {
> +			pr_err("Too many vmas requested\n");
> +			send_err(sk, r, -ENOSPC);
> +			goto out;
> +		}
> +
> +		dir_fd = get_mfd_dir_fd(ids, req->pid, O_RDONLY);
> +		if (dir_fd < 0) {
> +			dir_fd = get_mfd_dir_fd(ids, req->pid, O_PATH);
> +			if (dir_fd < 0) {
> +				send_err(sk, r, dir_fd);
> +				goto out;
> +			}
> +		}
> +
> +		rsp_size = sizeof(*rsp) + req->nr_vmas * sizeof(super_req_mfd_info_entry_t);
> +		rsp = xmalloc(rsp_size);
> +		if (!rsp) {
> +			send_err(sk, r, -ENOMEM);
> +			goto out;
> +		}
> +		memset(rsp, 0, sizeof(*rsp));
> +		memcpy(rsp, req, sizeof(*req));
> +
> +		for (i = 0; i < req->nr_vmas; i++) {
> +			int ret = nocc_cr_super_fetch_mfd_info(dir_fd,
> +							       req->u.vma[i].start,
> +							       req->u.vma[i].end,
> +							       &rsp->u.info[i]);
> +			if (ret) {
> +				xfree(rsp);
> +				send_err(sk, r, ret);
> +				goto out;
> +			}
> +		}
> +
> +		rsp->hdr.rsp_type = SUPER_RSP_OK;
> +		rsp->hdr.rsp_err = 0;
> +
> +		rsp_len = send(sk, rsp, rsp_size, 0);
> +		xfree(rsp);
> +
> +		if (rsp_len != rsp_size) {
> +			pr_perror("Can't send response");
> +			send_err(sk, r, -EIO);
> +		}
> +		break;
> +	}
> +	case SUPER_REQ_MFD_DRAIN:
> +	{
> +		super_req_mfd_drain_t *req = r;
> +		int fds[CR_SCM_MAX_FD];
> +		int dir_fd;
> +		size_t i;
> +
> +		if (req->nr_vmas > CR_SCM_MAX_FD) {
> +			pr_err("Too many fds requested\n");
> +			send_err(sk, r, -ENOSPC);
> +			goto out;
> +		}
> +
> +		dir_fd = get_mfd_dir_fd(ids, req->pid, O_PATH);
> +		if (dir_fd < 0) {
> +			send_err(sk, r, dir_fd);
> +			goto out;
> +		}
> +
> +		for (i = 0; i < req->nr_vmas; i++) {
> +			fds[i] = nocc_cr_super_open_mfd_file(dir_fd,
> +							     req->vma[i].start,
> +							     req->vma[i].end,
> +							     O_PATH);
> +			if (fds[i] < 0) {
> +				send_err(sk, r, fds[i]);
> +				goto out;
> +			}
> +		}
> +
> +		send_ok(sk, r);
> +
> +		if (send_fds(sk, NULL, 0, fds, req->nr_vmas, false)) {
> +			pr_perror("Can't send fds");
> +			send_err(sk, r, -EIO);
> +		}

Should we close all file descriptors from fds?

> +		break;
> +	}
> +	default:
> +		pr_err("Unknown request type %d\n", __r->req_type);
> +		send_err(sk, r, -EINVAL);
> +		break;
> +	}
> +
> +out:
> +	return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int socket_fd = -1;
> +	int ret_code = 1;
> +	int opt, idx;
> +
> +	void *recv_buf = NULL;
> +	ssize_t recv_size;
> +
> +	struct ucred ids;
> +	socklen_t ids_len = sizeof(ids);
> +	super_req_hdr_t err_hdr = {
> +		.req_type = SUPER_REQ_PING,
> +	};
> +
> +	static const char short_opts[] = "s:v:o:";
> +	static struct option long_opts[] = {
> +		{ "socket",	required_argument,	0, 's'	},
> +		{ "log-file",	required_argument,	0, 'o'	},
> +		{ },
> +	};
> +
> +	while (1) {
> +		idx = -1;
> +		opt = getopt_long(argc, argv, short_opts, long_opts, &idx);
> +		if (opt == -1)
> +			break;
> +
> +		switch (opt) {
> +		case 's':
> +			socket_fd = atoi(optarg);
> +			break;
> +		case 'o':
> +			log_file_fd = atoi(optarg);
> +			break;
> +		case 'v':
> +			if (optarg) {
> +				if (optarg[0] == 'v')
> +					cur_loglevel += strlen(optarg) + 1;
> +				else
> +					cur_loglevel = atoi(optarg);
> +			} else
> +				cur_loglevel++;
> +			break;
> +		default:
> +			goto usage;
> +		}
> +	}
> +
> +	if (socket_fd < 0)
> +		goto usage;
> +
> +	if (getsockopt(socket_fd, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) {
> +		pr_perror("Can't get socket options");
> +		goto out;
> +	}
> +
> +	/*
> +	 * FIXME: Add a test that we have such user in a proper group.
> +	 */
> +
> +	pr_debug("Waiting for requests\n");
> +	for (;;) {
> +		int ret;
> +
> +		recv_size = recv(socket_fd, NULL, 0, MSG_TRUNC | MSG_PEEK);
> +		if (recv_size < sizeof(super_req_hdr_t)) {
> +			send_err(socket_fd, &err_hdr, -EIO);
> +			pr_perror("Can't read request (got %u expecting %u)",
> +				  (unsigned int)recv_size,
> +				  (unsigned int)sizeof(super_req_hdr_t));
> +			goto out;
> +		}
> +
> +		recv_buf = xmalloc(recv_size);
> +		if (!recv_buf) {
> +			send_err(socket_fd, &err_hdr, -ENOMEM);
> +			goto out;
> +		}
> +
> +		recv_size = recv(socket_fd, recv_buf, recv_size, MSG_TRUNC);
> +		if (recv_size <= 0) {
> +			send_err(socket_fd, &err_hdr, -EIO);
> +			pr_perror("Can't read request (got %d)",
> +				  (unsigned int)recv_size);
> +			goto out;
> +		}
> +
> +		ret = handle_request(socket_fd, &ids, recv_buf, recv_size);
> +		if (ret < 0)
> +			goto out;
> +		else if (ret > 0) {
> +			pr_debug("Exiting\n");
> +			break;
> +		}
> +
> +		xfree(recv_buf);
> +		recv_buf = NULL;
> +	}
> +
> +	ret_code = 0;
> +out:
> +	xfree(recv_buf);
> +	_exit(ret_code);
> +
> +usage:
> +	fprintf(stderr, "%s: --socket <num> [--log-file <num>] [-v<num>]\n", argv[0]);
> +	_exit(1);
> +}
> diff --git a/super/nocc-shared.c b/super/nocc-shared.c
> new file mode 100644
> index 000000000000..74df9abbd063
> --- /dev/null
> +++ b/super/nocc-shared.c
> @@ -0,0 +1,65 @@
> +/*
> + * This routine is shared between executables and recompiled.
> + * So the file is included in a source form.
> + */
> +
> +static int nocc_cr_super_open_mfd_file(int mfd_dir_fd,
> +				       unsigned long vma_start,
> +				       unsigned long vma_end,
> +				       mode_t mode)
> +{
> +	char path[64];
> +	int fd;
> +
> +	snprintf(path, sizeof(path), "%lx-%lx", vma_start, vma_end);
> +	fd = openat(mfd_dir_fd, path, mode);
> +
> +	return fd >= 0 ? fd : -errno;
> +}
> +
> +static int nocc_cr_super_fetch_mfd_info(int mfd_dir_fd,
> +					unsigned long vma_start,
> +					unsigned long vma_end,
> +					super_req_mfd_info_entry_t *info)
> +{
> +	int fd_rdonly, fd_path;
> +	char buf[512], *str;
> +	int fd_stat;
> +	FILE *f;
> +
> +	snprintf(buf, sizeof(buf), "%lx-%lx", vma_start, vma_end);
> +	fd_stat = fd_rdonly = openat(mfd_dir_fd, buf, O_RDONLY);
> +	if (fd_rdonly < 0) {
> +		fd_stat = fd_path = openat(mfd_dir_fd, buf, O_PATH);
> +		if (fd_path < 0)
> +			return -errno;
> +		info->o_rdonly_error = fd_rdonly;
> +	} else
> +		info->o_rdonly_error = 0;
> +
> +	if (fstat(fd_stat, &info->st)) {
> +		close(fd_stat);
> +		return -errno;
> +	}
> +
> +	snprintf(buf, sizeof(buf), "/proc/self/fdinfo/%d", fd_stat);
> +	f = fopen(buf, "r");
> +	if (!f) {
> +		close(fd_stat);
> +		return -errno;
> +	}
> +
> +	while ((str = fgets(buf, sizeof(buf), f))) {
> +		if (!strncmp(str, "mnt_id:\t", 8))
> +			break;
> +	}
> +
> +	close(fd_stat);
> +	fclose(f);
> +
> +	if (!str)
> +		return -ENOENT;
> +
> +	info->mnt_id = atoi(&str[8]);
> +	return 0;
> +}
> -- 
> 2.4.3
> 


More information about the CRIU mailing list