[CRIU] [PATCH 1/3] cr-super: Initial commit

Cyrill Gorcunov gorcunov at openvz.org
Thu Sep 24 09:37:13 PDT 2015


Running CRIU with root privilegues is required because of kernel's
restrictions: in particular manipulating /proc/$pid/map_files/
is guarded with CAP_SYS_ADMIN (which I plan to drop off in future
but can't right now).

Still it is being found that when we run CRIU in service mode handling
RPC requests introduces problems with security context

  * CVE-2015-5228
    https://bugzilla.redhat.com/show_bug.cgi?id=1255782

  * CVE-2015-5231
    https://bugzilla.redhat.com/show_bug.cgi?id=1256728

And here is an idea how to cork up such kind of problems:
we introduce a small helper tool named 'cr-super' which
should have suid bit set with proper owner and CRIU would
run it when need to fetch information about map_files
entries.

Basically, a client opens a socketpair and pass one into
cr-super as an argument, then it asks for file descriptor
transfers via SUPER_REQ_MFD_DRAIN packet, cr-super opens
the appropriate map_files/ entry with O_PATH flag and
send it back into a caller.

Another option is to fetch information about the interested
entry via SUPER_REQ_STAT command, cr-super opens the appropriate
map_files/ entries in the following way

 - open wit O_RDNONLY
  - if it fails, then it saves @errno to be sent back
   - opens with O_PATH
 - fetch stat() over the entry
 - finds mnt_id from procfs
 - send all this information back to a caller via net packet.

The victim which are requested for map_files must be
ptraced by a client already, otherwise cr-super will
refuse to proceed the request.

In this early commit I didn't add test for client uid/gid
but need to check the client belong to say "criu" group
which would be allowed to talk to cr-super.

Also, need to modify CRIU itself to start using this
helper on demand (if criu itself is running under root
than we don't need to use this service).

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 Makefile            |  15 ++-
 super/Makefile      |   1 +
 super/cr-super.h    |  52 ++++++++
 super/main.c        | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 super/nocc-shared.c |  65 ++++++++++
 5 files changed, 493 insertions(+), 2 deletions(-)
 create mode 100644 super/Makefile
 create mode 100644 super/cr-super.h
 create mode 100644 super/main.c
 create mode 100644 super/nocc-shared.c

diff --git a/Makefile b/Makefile
index fdc6830f4e68..056603cd6c53 100644
--- a/Makefile
+++ b/Makefile
@@ -187,13 +187,14 @@ build-crtools := -r -R -f scripts/Makefile.build makefile=Makefile.crtools obj
 PROGRAM		:= criu
 
 .PHONY: all zdtm test rebuild clean distclean tags cscope	\
-	docs help pie protobuf $(ARCH_DIR) clean-built lib crit
+	docs help pie protobuf $(ARCH_DIR) clean-built lib crit	\
+	super cr-super
 
 ifeq ($(GCOV),1)
 %.o $(PROGRAM): override CFLAGS += --coverage
 endif
 
-all: config pie $(VERSION_HEADER) $(CRIU-LIB)
+all: config pie super cr-super $(VERSION_HEADER) $(CRIU-LIB)
 	$(Q) $(MAKE) $(PROGRAM)
 	$(Q) $(MAKE) crit
 
@@ -248,6 +249,15 @@ $(PROGRAM): $(SYSCALL-LIB) $(ARCH-LIB) $(PROGRAM-BUILTINS)
 	$(E) "  LINK    " $@
 	$(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
 
+super/%:: $(VERSION_HEADER) config built-in.o pie/util-fd.o $(SYSCALL-LIB)
+	$(Q) $(MAKE) $(build)=super $@
+super: $(VERSION_HEADER) config built-in.o pie/util-fd.o $(SYSCALL-LIB)
+	$(Q) $(MAKE) $(build)=super all
+
+cr-super: super/built-in.o pie/util-fd.o $(SYSCALL-LIB)
+	$(E) "  LINK    " $@
+	$(Q) $(CC) $(CFLAGS) $^ $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
+
 crit:
 	$(Q) $(MAKE) -C pycriu all
 
@@ -265,6 +275,7 @@ clean-built:
 	$(Q) $(MAKE) $(build)=pie clean
 	$(Q) $(MAKE) $(build)=lib clean
 	$(Q) $(MAKE) $(build-crtools)=. clean
+	$(Q) $(MAKE) $(build)=super clean
 	$(Q) $(MAKE) -C Documentation clean
 	$(Q) $(RM) ./include/config.h
 	$(Q) $(RM) ./$(PROGRAM)
diff --git a/super/Makefile b/super/Makefile
new file mode 100644
index 000000000000..b666967fd570
--- /dev/null
+++ b/super/Makefile
@@ -0,0 +1 @@
+obj-y += main.o
diff --git a/super/cr-super.h b/super/cr-super.h
new file mode 100644
index 000000000000..3ad3605be968
--- /dev/null
+++ b/super/cr-super.h
@@ -0,0 +1,52 @@
+#ifndef __CR_SUPER_H__
+#define __CR_SUPER_H__
+
+#include <sys/types.h>
+
+enum {
+	SUPER_RSP_OK				= 1,
+	SUPER_RSP_ERR				= 2,
+
+	SUPER_REQ_PING				= 3,
+	SUPER_REQ_EXIT				= 4,
+	SUPER_REQ_MFD_DRAIN			= 5,
+	SUPER_REQ_STAT				= 6,
+
+	SUPER_REQ_MAX
+};
+
+typedef struct {
+	unsigned short				req_type;
+	unsigned short				rsp_type;
+	int					rsp_err;
+} super_req_hdr_t;
+
+typedef struct {
+	unsigned long				start;
+	unsigned long				end;
+} super_req_vma_t;
+
+typedef struct {
+	super_req_hdr_t				hdr;
+	pid_t					pid;
+	size_t					nr_vmas;
+	super_req_vma_t				vma[0];
+} super_req_mfd_drain_t;
+
+typedef struct {
+	int					mnt_id;
+	struct stat				st;
+	int					o_rdonly_error;
+} super_req_mfd_info_entry_t;
+
+typedef struct {
+	super_req_hdr_t				hdr;
+	pid_t					pid;
+	size_t					nr_vmas;
+	union {
+		super_req_vma_t			vma[0];
+		super_req_mfd_info_entry_t	info[0];
+	} u;
+} super_req_mfd_info_t;
+
+#endif /* __CR_SUPER_H__ */
diff --git a/super/main.c b/super/main.c
new file mode 100644
index 000000000000..d4559df14905
--- /dev/null
+++ b/super/main.c
@@ -0,0 +1,362 @@
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <getopt.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <limits.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+
+#include "criu-log.h"
+#include "util-pie.h"
+#include "xmalloc.h"
+
+#include "super/cr-super.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "spr: "
+
+static int cur_loglevel = DEFAULT_LOGLEVEL;
+static int log_file_fd = STDOUT_FILENO;
+static int mfd_dir_fd = -1;
+
+void print_on_level(unsigned int loglevel, const char *format, ...)
+{
+	va_list params;
+	char buf[4096];
+	int size;
+
+	if (loglevel > cur_loglevel)
+		return;
+
+	va_start(params, format);
+	size = vsnprintf(buf, sizeof(buf), format, params);
+	if (size > 0)
+		write(log_file_fd, buf, size);
+	va_end(params);
+}
+
+static int send_ok(int sk, super_req_hdr_t *r)
+{
+	r->rsp_type = SUPER_RSP_OK;
+	r->rsp_err = 0;
+
+	pr_debug("<- req %2d rsp %2d err %2d\n", r->req_type, SUPER_RSP_OK, 0);
+	if (send(sk, r, sizeof(*r), 0) != sizeof(*r)) {
+		pr_perror("Can't send ok response");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int send_err(int sk, super_req_hdr_t *r, int error)
+{
+	r->rsp_type = SUPER_RSP_ERR;
+	r->rsp_err = error;
+
+	pr_debug("<- req %2d rsp %2d err %2d\n", r->req_type, SUPER_RSP_ERR, error);
+	if (send(sk, r, sizeof(*r), 0) != sizeof(*r)) {
+		pr_perror("Can't send error response");
+		return -1;
+	}
+
+	return 0;
+}
+
+static bool match_tracer_pid(pid_t tracer_pid, pid_t pid)
+{
+	char buf[512], *str;
+	bool match = false;
+	char path[128];
+	FILE *f;
+
+	snprintf(path, sizeof(path), "/proc/%d/status", pid);
+	f = fopen(path, "r");
+	if (!f) {
+		pr_perror("Can't open %s\n", path);
+		return -1;
+	}
+
+	while ((str = fgets(buf, sizeof(buf), f))) {
+		if (strncmp(str, "TracerPid:\t", 11))
+			continue;
+		if (tracer_pid == atoi(&str[11]))
+			match = true;
+		break;
+	}
+	fclose(f);
+
+	return match;
+}
+
+#include "super/nocc-shared.c"
+
+static int get_mfd_dir_fd(struct ucred *ids, pid_t pid, mode_t mode)
+{
+	if (!match_tracer_pid(ids->pid, pid)) {
+		pr_err("Pids mismatch\n");
+		return -EPERM;
+	}
+
+	if (mfd_dir_fd < 0) {
+		char path[64];
+
+		snprintf(path, sizeof(path), "/proc/%d/map_files", pid);
+		mfd_dir_fd = open(path, mode);
+		if (mfd_dir_fd < 0) {
+			pr_perror("Can't open map_files");
+			return -errno;
+		}
+	}
+
+	return mfd_dir_fd;
+}
+
+static int handle_request(int sk, struct ucred *ids, void *r, size_t size)
+{
+	super_req_hdr_t *__r = r;
+
+	pr_debug("-> req %2d\n", __r->req_type);
+
+	switch (__r->req_type) {
+	case SUPER_REQ_PING:
+		send_ok(sk, r);
+		break;
+	case SUPER_REQ_EXIT:
+		if (mfd_dir_fd >= 0) {
+			close(mfd_dir_fd);
+			mfd_dir_fd = -1;
+		}
+
+		send_ok(sk, r);
+		return 1;
+		break;
+	case SUPER_REQ_STAT:
+	{
+		super_req_mfd_info_t *req = r;
+		super_req_mfd_info_t *rsp;
+
+		ssize_t rsp_len, rsp_size;
+		int dir_fd;
+		size_t i;
+
+		if (req->nr_vmas > CR_SCM_MAX_FD) {
+			pr_err("Too many vmas requested\n");
+			send_err(sk, r, -ENOSPC);
+			goto out;
+		}
+
+		dir_fd = get_mfd_dir_fd(ids, req->pid, O_RDONLY);
+		if (dir_fd < 0) {
+			dir_fd = get_mfd_dir_fd(ids, req->pid, O_PATH);
+			if (dir_fd < 0) {
+				send_err(sk, r, dir_fd);
+				goto out;
+			}
+		}
+
+		rsp_size = sizeof(*rsp) + req->nr_vmas * sizeof(super_req_mfd_info_entry_t);
+		rsp = xmalloc(rsp_size);
+		if (!rsp) {
+			send_err(sk, r, -ENOMEM);
+			goto out;
+		}
+		memset(rsp, 0, sizeof(*rsp));
+		memcpy(rsp, req, sizeof(*req));
+
+		for (i = 0; i < req->nr_vmas; i++) {
+			int ret = nocc_cr_super_fetch_mfd_info(dir_fd,
+							       req->u.vma[i].start,
+							       req->u.vma[i].end,
+							       &rsp->u.info[i]);
+			if (ret) {
+				xfree(rsp);
+				send_err(sk, r, ret);
+				goto out;
+			}
+		}
+
+		rsp->hdr.rsp_type = SUPER_RSP_OK;
+		rsp->hdr.rsp_err = 0;
+
+		rsp_len = send(sk, rsp, rsp_size, 0);
+		xfree(rsp);
+
+		if (rsp_len != rsp_size) {
+			pr_perror("Can't send response");
+			send_err(sk, r, -EIO);
+		}
+		break;
+	}
+	case SUPER_REQ_MFD_DRAIN:
+	{
+		super_req_mfd_drain_t *req = r;
+		int fds[CR_SCM_MAX_FD];
+		int dir_fd;
+		size_t i;
+
+		if (req->nr_vmas > CR_SCM_MAX_FD) {
+			pr_err("Too many fds requested\n");
+			send_err(sk, r, -ENOSPC);
+			goto out;
+		}
+
+		dir_fd = get_mfd_dir_fd(ids, req->pid, O_PATH);
+		if (dir_fd < 0) {
+			send_err(sk, r, dir_fd);
+			goto out;
+		}
+
+		for (i = 0; i < req->nr_vmas; i++) {
+			fds[i] = nocc_cr_super_open_mfd_file(dir_fd,
+							     req->vma[i].start,
+							     req->vma[i].end,
+							     O_PATH);
+			if (fds[i] < 0) {
+				send_err(sk, r, fds[i]);
+				goto out;
+			}
+		}
+
+		send_ok(sk, r);
+
+		if (send_fds(sk, NULL, 0, fds, req->nr_vmas, false)) {
+			pr_perror("Can't send fds");
+			send_err(sk, r, -EIO);
+		}
+		break;
+	}
+	default:
+		pr_err("Unknown request type %d\n", __r->req_type);
+		send_err(sk, r, -EINVAL);
+		break;
+	}
+
+out:
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int socket_fd = -1;
+	int ret_code = 1;
+	int opt, idx;
+
+	void *recv_buf = NULL;
+	ssize_t recv_size;
+
+	struct ucred ids;
+	socklen_t ids_len = sizeof(ids);
+	super_req_hdr_t err_hdr = {
+		.req_type = SUPER_REQ_PING,
+	};
+
+	static const char short_opts[] = "s:v:o:";
+	static struct option long_opts[] = {
+		{ "socket",	required_argument,	0, 's'	},
+		{ "log-file",	required_argument,	0, 'o'	},
+		{ },
+	};
+
+	while (1) {
+		idx = -1;
+		opt = getopt_long(argc, argv, short_opts, long_opts, &idx);
+		if (opt == -1)
+			break;
+
+		switch (opt) {
+		case 's':
+			socket_fd = atoi(optarg);
+			break;
+		case 'o':
+			log_file_fd = atoi(optarg);
+			break;
+		case 'v':
+			if (optarg) {
+				if (optarg[0] == 'v')
+					cur_loglevel += strlen(optarg) + 1;
+				else
+					cur_loglevel = atoi(optarg);
+			} else
+				cur_loglevel++;
+			break;
+		default:
+			goto usage;
+		}
+	}
+
+	if (socket_fd < 0)
+		goto usage;
+
+	if (getsockopt(socket_fd, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) {
+		pr_perror("Can't get socket options");
+		goto out;
+	}
+
+	/*
+	 * FIXME: Add a test that we have such user in a proper group.
+	 */
+
+	pr_debug("Waiting for requests\n");
+	for (;;) {
+		int ret;
+
+		recv_size = recv(socket_fd, NULL, 0, MSG_TRUNC | MSG_PEEK);
+		if (recv_size < sizeof(super_req_hdr_t)) {
+			send_err(socket_fd, &err_hdr, -EIO);
+			pr_perror("Can't read request (got %u expecting %u)",
+				  (unsigned int)recv_size,
+				  (unsigned int)sizeof(super_req_hdr_t));
+			goto out;
+		}
+
+		recv_buf = xmalloc(recv_size);
+		if (!recv_buf) {
+			send_err(socket_fd, &err_hdr, -ENOMEM);
+			goto out;
+		}
+
+		recv_size = recv(socket_fd, recv_buf, recv_size, MSG_TRUNC);
+		if (recv_size <= 0) {
+			send_err(socket_fd, &err_hdr, -EIO);
+			pr_perror("Can't read request (got %d)",
+				  (unsigned int)recv_size);
+			goto out;
+		}
+
+		ret = handle_request(socket_fd, &ids, recv_buf, recv_size);
+		if (ret < 0)
+			goto out;
+		else if (ret > 0) {
+			pr_debug("Exiting\n");
+			break;
+		}
+
+		xfree(recv_buf);
+		recv_buf = NULL;
+	}
+
+	ret_code = 0;
+out:
+	xfree(recv_buf);
+	_exit(ret_code);
+
+usage:
+	fprintf(stderr, "%s: --socket <num> [--log-file <num>] [-v<num>]\n", argv[0]);
+	_exit(1);
+}
diff --git a/super/nocc-shared.c b/super/nocc-shared.c
new file mode 100644
index 000000000000..74df9abbd063
--- /dev/null
+++ b/super/nocc-shared.c
@@ -0,0 +1,65 @@
+/*
+ * This routine is shared between executables and recompiled.
+ * So the file is included in a source form.
+ */
+
+static int nocc_cr_super_open_mfd_file(int mfd_dir_fd,
+				       unsigned long vma_start,
+				       unsigned long vma_end,
+				       mode_t mode)
+{
+	char path[64];
+	int fd;
+
+	snprintf(path, sizeof(path), "%lx-%lx", vma_start, vma_end);
+	fd = openat(mfd_dir_fd, path, mode);
+
+	return fd >= 0 ? fd : -errno;
+}
+
+static int nocc_cr_super_fetch_mfd_info(int mfd_dir_fd,
+					unsigned long vma_start,
+					unsigned long vma_end,
+					super_req_mfd_info_entry_t *info)
+{
+	int fd_rdonly, fd_path;
+	char buf[512], *str;
+	int fd_stat;
+	FILE *f;
+
+	snprintf(buf, sizeof(buf), "%lx-%lx", vma_start, vma_end);
+	fd_stat = fd_rdonly = openat(mfd_dir_fd, buf, O_RDONLY);
+	if (fd_rdonly < 0) {
+		fd_stat = fd_path = openat(mfd_dir_fd, buf, O_PATH);
+		if (fd_path < 0)
+			return -errno;
+		info->o_rdonly_error = fd_rdonly;
+	} else
+		info->o_rdonly_error = 0;
+
+	if (fstat(fd_stat, &info->st)) {
+		close(fd_stat);
+		return -errno;
+	}
+
+	snprintf(buf, sizeof(buf), "/proc/self/fdinfo/%d", fd_stat);
+	f = fopen(buf, "r");
+	if (!f) {
+		close(fd_stat);
+		return -errno;
+	}
+
+	while ((str = fgets(buf, sizeof(buf), f))) {
+		if (!strncmp(str, "mnt_id:\t", 8))
+			break;
+	}
+
+	close(fd_stat);
+	fclose(f);
+
+	if (!str)
+		return -ENOENT;
+
+	info->mnt_id = atoi(&str[8]);
+	return 0;
+}
-- 
2.4.3



More information about the CRIU mailing list