[CRIU] [PATCH] cr-super: Initial commit
Andrew Vagin
avagin at odin.com
Wed Sep 16 04:55:09 PDT 2015
On Wed, Sep 16, 2015 at 12:57:43PM +0300, Cyrill Gorcunov wrote:
> Running CRIU with root privilegues is required because of kernel's
> restrictions: in particular manipulating /proc/$pid/map_files/
> is guarded with CAP_SYS_ADMIN (which I plan to drop off in future
> but can't right now).
>
> Still it is being found that when we run CRIU in service mode handling
> RPC requests introduces problems with security context
>
> * CVE-2015-5228
> https://bugzilla.redhat.com/show_bug.cgi?id=1255782
>
> * CVE-2015-5231
> https://bugzilla.redhat.com/show_bug.cgi?id=1256728
>
> And here is an idea how to cork up such kind of problems:
> we introduce a small helper tool named 'cr-super' which
> should have suid bit set with proper owner and CRIU would
> run it when need to fetch information about map_files
> entries.
>
> Basically, a client opens a socketpair and pass one into
> cr-super as an argument, in turn cr-super fetches commands
> from the socket, fill the information needed into the buffer
> and send it back to the client.
>
> In this early commit I didn't add test for client uid/gid
> but need to check the client belong to say "criu" group
> which would be allowed to talk to cr-super.
In addition, we need to check that we are able to attache to a process
by ptrace.
I think for that we need to drop CAP_SYS_PTRACE from the effective set,
try to call PTRACE_SEIZE and if this operation was success, we can read
map_files.
>
> Also, need to modify CRIU itself to start using this
> helper on demand (if criu itself is running under root
> than we don't need to use this service).
>
> Thus it's an early draft, take a look, comments are welcome.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> Makefile | 14 ++-
> super/Makefile | 1 +
> super/cr-super.h | 59 +++++++++++
> super/main.c | 318 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 390 insertions(+), 2 deletions(-)
> create mode 100644 super/Makefile
> create mode 100644 super/cr-super.h
> create mode 100644 super/main.c
>
> diff --git a/Makefile b/Makefile
> index fdc6830f4e68..a36841270fab 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -187,13 +187,14 @@ build-crtools := -r -R -f scripts/Makefile.build makefile=Makefile.crtools obj
> PROGRAM := criu
>
> .PHONY: all zdtm test rebuild clean distclean tags cscope \
> - docs help pie protobuf $(ARCH_DIR) clean-built lib crit
> + docs help pie protobuf $(ARCH_DIR) clean-built lib crit \
> + super cr-super
>
> ifeq ($(GCOV),1)
> %.o $(PROGRAM): override CFLAGS += --coverage
> endif
>
> -all: config pie $(VERSION_HEADER) $(CRIU-LIB)
> +all: config pie super cr-super $(VERSION_HEADER) $(CRIU-LIB)
> $(Q) $(MAKE) $(PROGRAM)
> $(Q) $(MAKE) crit
>
> @@ -248,6 +249,15 @@ $(PROGRAM): $(SYSCALL-LIB) $(ARCH-LIB) $(PROGRAM-BUILTINS)
> $(E) " LINK " $@
> $(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
>
> +super/%:: $(VERSION_HEADER) config built-in.o
> + $(Q) $(MAKE) $(build)=super $@
> +super: $(VERSION_HEADER) config built-in.o
> + $(Q) $(MAKE) $(build)=super all
> +
> +cr-super: super/built-in.o
> + $(E) " LINK " $@
> + $(Q) $(CC) $(CFLAGS) $^ $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
> +
> crit:
> $(Q) $(MAKE) -C pycriu all
>
> diff --git a/super/Makefile b/super/Makefile
> new file mode 100644
> index 000000000000..b666967fd570
> --- /dev/null
> +++ b/super/Makefile
> @@ -0,0 +1 @@
> +obj-y += main.o
> diff --git a/super/cr-super.h b/super/cr-super.h
> new file mode 100644
> index 000000000000..a5724a552a29
> --- /dev/null
> +++ b/super/cr-super.h
> @@ -0,0 +1,59 @@
> +#ifndef __CR_SUPER_H__
> +#define __CR_SUPER_H__
> +
> +#include <limits.h>
> +
> +enum {
> + SUPER_RSP_OK = 1,
> + SUPER_RSP_ERR = 2,
> +
> + SUPER_REQ_EXIT = 3,
> +
> + SUPER_REQ_MFD_OPEN_DIR = 4,
Why do we need to have a separate command to open a directory?
We can cache a directory desriptor and open a directory only
if prev_pid isn't equal to req->pid.
> + SUPER_REQ_MFD_FILL_STAT = 5,
> + SUPER_REQ_MFD_FILL_LINK = 6,
> + SUPER_REQ_MFD_FILL_MNT_ID = 7,
Why we can't have one only one command?
> +
> + SUPER_REQ_MAX
> +};
> +
> +typedef struct {
> + unsigned short req_type;
> + unsigned short rsp_type;
> + unsigned short rsp_err;
> + unsigned short __pad;
> +} super_req_hdr_t;
> +
> +typedef struct {
> + super_req_hdr_t hdr;
> + pid_t pid;
> +} super_req_mfd_open_dir_t;
> +
> +typedef struct {
> + super_req_hdr_t hdr;
> + /* IN */
> + unsigned long vma_start;
> + unsigned long vma_end;
> + /* OUT */
> + char path[PATH_MAX];
> +} super_req_mfd_fill_link_t;
> +
> +typedef struct {
> + super_req_hdr_t hdr;
> + /* IN */
> + unsigned long vma_start;
> + unsigned long vma_end;
> + /* OUT */
> + struct stat st;
> +} super_req_mfd_fill_stat_t;
> +
> +typedef struct {
> + super_req_hdr_t hdr;
> + /* IN */
> + unsigned long vma_start;
> + unsigned long vma_end;
> + /* OUT */
> + unsigned int mnt_id;
> +} super_req_mfd_fill_mnt_id_t;
> +
> +#endif /* __CR_SUPER_H__ */
> diff --git a/super/main.c b/super/main.c
> new file mode 100644
> index 000000000000..b4f82e2d067c
> --- /dev/null
> +++ b/super/main.c
> @@ -0,0 +1,318 @@
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <getopt.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <stdarg.h>
> +#include <dirent.h>
> +#include <fcntl.h>
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/wait.h>
> +#include <sys/stat.h>
> +
> +#include "criu-log.h"
> +#include "xmalloc.h"
> +
> +#include "super/cr-super.h"
> +
> +#undef LOG_PREFIX
> +#define LOG_PREFIX "spr: "
> +
> +static int mfd_dir_fd = -1;
> +
> +#define __RSP_OK(__r) \
> + do { \
> + ((super_req_hdr_t *)__r)->rsp_type = SUPER_RSP_OK; \
> + ((super_req_hdr_t *)__r)->rsp_err = 0; \
> + } while (0)
> +
> +#define __RSP_ERR(__r, __err) \
> + do { \
> + ((super_req_hdr_t *)__r)->rsp_type = SUPER_RSP_ERR; \
> + ((super_req_hdr_t *)__r)->rsp_err = (__err); \
> + } while (0)
> +
> +#define __TRACE_SND_PKT(__r) \
> + do { \
> + pr_debug("snd -> req: %2d rsp: %2d err: %2d\n", \
> + ((super_req_hdr_t *)__r)->req_type, \
> + ((super_req_hdr_t *)__r)->rsp_type, \
> + ((super_req_hdr_t *)__r)->rsp_err); \
> + } while (0)
> +
> +#define __TRACE_RCV_PKT(__r) \
> + do { \
> + pr_debug("rcv -> req: %2d\n", \
> + ((super_req_hdr_t *)__r)->req_type); \
> + } while (0)
> +
> +void print_on_level(unsigned int loglevel, const char *format, ...)
> +{
> + va_list params;
> +
> + va_start(params, format);
> + vprintf(format, params);
> + va_end(params);
> +}
> +
> +static int handle_request(int sk, struct ucred *ids, void *req, size_t size)
> +{
> + super_req_hdr_t *r = req;
> + super_req_mfd_fill_mnt_id_t *mfd_fill_mnt_id;
> + super_req_mfd_fill_stat_t *mfd_fill_stat;
> + super_req_mfd_fill_link_t *mfd_fill_link;
> + super_req_mfd_open_dir_t *mfd_open_dir;
> +
> + char buf[1024], *str;
> + char path[128];
> + FILE *f;
> + ssize_t len;
> + int fd;
> +
> +#define __check_opened(__r) \
> + do { \
> + if (mfd_dir_fd < 0) { \
> + pr_err("dir is not yet opened\n"); \
> + __RSP_ERR(__r, -EINVAL); \
> + return -EINVAL; \
> + } \
> + } while (0)
I don't like macroses which calls "return"
> +
> +#define __check_size(__r, __type, __size) \
> + do { \
> + if ((__size) < sizeof(__type)) { \
> + pr_err("corrupted header\n"); \
> + __RSP_ERR(__r, -EINVAL); \
> + return -EINVAL; \
> + } \
> + } while (0)
> +
> + switch (r->req_type) {
> + case SUPER_REQ_EXIT:
> + break;
> + case SUPER_REQ_MFD_OPEN_DIR:
> + mfd_open_dir = req;
> +
> + if (mfd_dir_fd >= 0) {
> + pr_err("mfd already opened\n");
> + __RSP_ERR(req, -EINVAL);
> + return -EINVAL;
> + }
> +
> + snprintf(path, sizeof(path), "/proc/%d/map_files", mfd_open_dir->pid);
> +
> + mfd_dir_fd = open(path, O_RDONLY);
> + if (mfd_dir_fd < 0) {
> + pr_perror("Can't open %s", path);
> + __RSP_ERR(r, -errno);
> + return -errno;
> + }
> + break;
> + case SUPER_REQ_MFD_FILL_STAT:
> + mfd_fill_stat = req;
> +
> + __check_opened(r);
> + __check_size(r, typeof(*mfd_fill_stat), size);
> +
> + snprintf(path, sizeof(path), "%lx-%lx",
> + mfd_fill_stat->vma_start, mfd_fill_stat->vma_end);
> +
> + fd = openat(mfd_dir_fd, path, O_RDONLY);
> + if (fd < 0) {
> + pr_perror("Can't open %s", path);
> + __RSP_ERR(r, -errno);
> + return -errno;
> + }
> + if (fstatat(mfd_dir_fd, path, &mfd_fill_stat->st, 0)) {
> + pr_perror("Can't stat %s", path);
> + __RSP_ERR(r, -errno);
> + return -errno;
> + }
> + close(fd);
> + break;
> + case SUPER_REQ_MFD_FILL_LINK:
> + mfd_fill_link = req;
> +
> + __check_opened(r);
> + __check_size(r, typeof(*mfd_fill_link), size);
> +
> + snprintf(path, sizeof(path), "%lx-%lx",
> + mfd_fill_link->vma_start, mfd_fill_link->vma_end);
> +
> + mfd_fill_link->path[0] = '.';
> + len = readlinkat(mfd_dir_fd, path, &mfd_fill_link->path[1],
> + sizeof(mfd_fill_link->path) - 1);
> + if (len < 0 || len == (sizeof(mfd_fill_link->path) - 1)) {
> + pr_perror("Can't readlink on %s", path);
> + __RSP_ERR(r, len < 0 ? -errno : -ENOSPC);
> + return len < 0 ? -errno : -ENOSPC;
> + }
> + mfd_fill_link->path[len] = 0;
> + break;
> + case SUPER_REQ_MFD_FILL_MNT_ID:
> + mfd_fill_mnt_id = req;
> +
> + __check_opened(r);
> + __check_size(r, typeof(*mfd_fill_mnt_id), size);
> +
> + snprintf(path, sizeof(path), "%lx-%lx",
> + mfd_fill_mnt_id->vma_start,
> + mfd_fill_mnt_id->vma_end);
> +
> + fd = openat(mfd_dir_fd, path, O_RDONLY);
> + if (fd < 0) {
> + pr_perror("Can't open %s", path);
> + __RSP_ERR(r, -errno);
> + return -errno;
> + }
> +
> + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);
> + f = fopen(path, "r");
> + if (!f) {
> + pr_perror("Can't open %s", path);
> + __RSP_ERR(r, -errno);
> + return -errno;
> + }
> +
> + mfd_fill_mnt_id->mnt_id = -1;
> +
> + while ((str = fgets(buf, sizeof(buf), f))) {
> + if (strncmp(str, "mnt_id:\t", 8))
> + continue;
> + mfd_fill_mnt_id->mnt_id = atol(&str[8]);
> + break;
> + }
> +
> + fclose(f);
> + close(fd);
> +
> + if (mfd_fill_mnt_id->mnt_id == -1) {
> + pr_err("Can't parse fdinfo of %lx-%lx\n",
> + mfd_fill_mnt_id->vma_start,
> + mfd_fill_mnt_id->vma_end);
> + __RSP_ERR(r, -ENOENT);
> + return -ENOENT;
> + }
> + break;
> + default:
> + pr_err("Unknown request type\n");
> + __RSP_ERR(r, -EINVAL);
> + return -ENOENT;
> + }
> +
> + __RSP_OK(r);
> + return 0;
> +
> +#undef __check_size
> +#undef __check_opened
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + int socket_fd = -1;
> + int opt, idx;
> + int ret = 1;
> +
> + void *reqcv_buf = NULL;
> + super_req_hdr_t *r;
> + ssize_t recv_size;
> + ssize_t send_size;
> +
> + struct ucred ids;
> + socklen_t ids_len = sizeof(ids);
> +
> + static const char short_opts[] = "f:";
> + static struct option long_opts[] = {
> + { "fd", required_argument, 0, 'f' },
> + { },
> + };
> +
> + while (1) {
> + idx = -1;
> + opt = getopt_long(argc, argv, short_opts, long_opts, &idx);
> + if (opt == -1)
> + break;
> +
> + switch (opt) {
> + case 'f':
> + socket_fd = atoi(optarg);
> + break;
> + default:
> + goto out;
> + }
> + }
> +
> + if (socket_fd < 0)
> + goto usage;
> +
> + if (getsockopt(socket_fd, SOL_SOCKET, SO_PEERCRED, &ids, &ids_len)) {
> + pr_perror("Can't get socket options");
> + goto out;
> + }
> +
> + /*
> + * FIXME: Add test that we have such user in a proper group.
> + */
> +
> + pr_debug("Listening\n");
> + for (;;) {
> + recv_size = recv(socket_fd, NULL, 0, MSG_TRUNC | MSG_PEEK);
> + if (recv_size < sizeof(super_req_hdr_t)) {
> + pr_perror("Can't read request");
> + goto out;
> + }
> +
> + reqcv_buf = xmalloc(recv_size);
> + if (!reqcv_buf)
> + goto out;
> +
> + recv_size = recv(socket_fd, reqcv_buf, recv_size, MSG_TRUNC);
> + if (recv_size < 0) {
> + pr_perror("Can't read request");
> + goto out;
> + } else if (recv_size == 0)
> + goto out;
> +
> + __TRACE_RCV_PKT(reqcv_buf);
> +
> + ret = handle_request(socket_fd, &ids, reqcv_buf, recv_size);
> + if (ret < 0)
> + goto out;
> +
> + __TRACE_SND_PKT(reqcv_buf);
> +
> + send_size = send(socket_fd, reqcv_buf, recv_size, 0);
> + if (send_size < recv_size) {
> + pr_perror("Can't send request");
Don't mix ret and the function exit code.
Here ret is zero, so you return zero code for the error case
> + goto out;
> + }
> +
> + r = reqcv_buf;
> + if (r->req_type == SUPER_REQ_EXIT)
> + break;
> +
> + xfree(reqcv_buf);
> + reqcv_buf = NULL;
> + }
> +
> + ret = 0;
> +out:
> + if (socket_fd >= 0)
> + close(socket_fd);
> + xfree(reqcv_buf);
> + return ret;
> +
> +usage:
> + printf("%s: --fd <num>\n", argv[0][1]);
This should be printed on stderr
> + return 1;
> +}
> --
> 2.4.3
>
More information about the CRIU
mailing list