[CRIU] [RFC] fds TX/RX
Stanislav Kinsbursky
skinsbursky at parallels.com
Fri Mar 16 09:37:23 EDT 2012
This patch intersect with my current changes in fd migration.
Would be great, if you'd not touch fd dump calls and their internal variables
for a while where possible.
BTW, such hunks like below are useless and just makes review harder:
> - pipe_size = fcntl(lfd, F_GETPIPE_SZ);
> + pipe_size = fcntl(fd, F_GETPIPE_SZ);
16.03.2012 17:29, Cyrill Gorcunov пишет:
> Hi,
>
> here is a patch I would like to discuss. It handles
> tx/rx of file descriptors. Sometimes in make tests
> I see that log file were not closed properly after
> restore, I'm working on it now. But to draw overall
> picture the patch is below.
>
> Cyrill
> ---
> From: Cyrill Gorcunov<gorcunov at openvz.org>
> Date: Fri, 16 Mar 2012 17:19:35 +0400
> Subject: [PATCH] dump: Transfer file descriptors the dumpee has into our
> space via SCM
>
> There were an idea from Pavel to move all file descriptors the dumpee
> has into our space via SCM facility. Then we can do anything we need
> with descriptors directly without calling parasite code anymore.
>
> In particular it will be needed for fowners dumping.
>
> While the patch looks big in real it does the following
> simple steps
>
> - at dumping start procedure the crtools scans dumpee
> /proc/pid/fd directory and collects the fds being found
> there
>
> - then crtools inject parasite code and pass the set of
> collected fds into parasite, the parasite in turn send
> these descriptors back to our space via SCM messages
>
> - once the fds are observed the further dumping code operate
> over new our 'local' fds.
>
> To make this all happen the folowing structures were brought in
>
> - struct fds_scm_map which carries both original and new file
> descriptors with 1:1 map
>
> - struct scm_fdset which serves for fast transmission/receiving
> of file descriptors
>
> Signed-off-by: Cyrill Gorcunov<gorcunov at openvz.org>
> ---
> Makefile | 1 +
> cr-dump.c | 261 +++++++++++++++++++++++---------------------
> fds-scm-map.c | 140 +++++++++++++++++++++++
> include/fds-scm-map.h | 22 ++++
> include/parasite-syscall.h | 1 +
> include/parasite.h | 13 ++-
> include/syscall-codes.h | 1 +
> include/syscall.h | 5 +
> include/types.h | 18 +++
> include/util-net.h | 33 ++++++
> include/util.h | 13 ++
> parasite-syscall.c | 100 +++++++++++++++---
> parasite.c | 65 +++++++++++
> util-net.c | 104 +++++++++++++++++-
> 14 files changed, 635 insertions(+), 142 deletions(-)
> create mode 100644 fds-scm-map.c
> create mode 100644 include/fds-scm-map.h
>
> diff --git a/Makefile b/Makefile
> index 94161f9..c9322ec 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -46,6 +46,7 @@ OBJS += file-ids.o
> OBJS += namespaces.o
> OBJS += uts_ns.o
> OBJS += ipc_ns.o
> +OBJS += fds-scm-map.o
>
> OBJS-BLOB += parasite.o
> SRCS-BLOB += $(patsubst %.o,%.c,$(OBJS-BLOB))
> diff --git a/cr-dump.c b/cr-dump.c
> index 7ca75a6..7fc1c1f 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -32,19 +32,23 @@
> #include "image.h"
> #include "proc_parse.h"
> #include "parasite-syscall.h"
> +#include "fds-scm-map.h"
>
> #ifndef CONFIG_X86_64
> # error No x86-32 support yet
> #endif
>
> struct fd_parms {
> - unsigned long fd_name;
> - unsigned long pos;
> - unsigned int flags;
> - unsigned int type;
> + unsigned int scm_fd_locl; /* our fd number transferred from parasite */
> + unsigned int scm_fd_orig; /* corresponding original fd */
> + struct vma_entry *vma_entry; /* Used for VMA file maps */
>
> - u64 id;
> - pid_t pid;
> + unsigned long pos;
> + unsigned int flags;
> + unsigned int type;
> +
> + u64 id;
> + pid_t pid;
> };
>
> static char big_buffer[PATH_MAX];
> @@ -97,16 +101,15 @@ err:
> return ret;
> }
>
> -static int dump_one_reg_file(const struct fd_parms *p, int lfd,
> - const struct cr_fdset *cr_fdset,
> - bool do_close_lfd)
> +static int dump_one_reg_file(const struct fd_parms *params,
> + const struct cr_fdset *cr_fdset)
> {
> struct fdinfo_entry e;
> char fd_str[128];
> int len;
> int ret = -1;
>
> - snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", lfd);
> + snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", params->scm_fd_locl);
> len = readlink(fd_str, big_buffer, sizeof(big_buffer) - 1);
> if (len< 0) {
> pr_perror("Can't readlink %s", fd_str);
> @@ -114,17 +117,19 @@ static int dump_one_reg_file(const struct fd_parms *p, int lfd,
> }
>
> big_buffer[len] = '\0';
> - pr_info("Dumping path for %lx fd via self %d [%s]\n",
> - p->fd_name, lfd, big_buffer);
> -
> - if (do_close_lfd)
> - close(lfd);
> + pr_info("Dumping path for %d fd [%s]\n",
> + params->scm_fd_orig, big_buffer);
>
> - e.type = p->type;
> + e.type = params->type;
> e.len = len;
> - e.flags = p->flags;
> - e.pos = p->pos;
> - e.addr = p->fd_name;
> + e.flags = params->flags;
> + e.pos = params->pos;
> +
> + if (params->type == FDINFO_MAP)
> + e.addr = params->vma_entry->start;
> + else
> + e.addr = params->scm_fd_orig;
> +
> e.id = FD_ID_INVALID;
>
> if (likely(!fd_is_special(&e))) {
> @@ -136,7 +141,7 @@ static int dump_one_reg_file(const struct fd_parms *p, int lfd,
> */
> BUILD_BUG_ON(sizeof(entry->u.key) != sizeof(e.id));
>
> - entry = fd_id_entry_collect((u32)p->id, p->pid, p->fd_name);
> + entry = fd_id_entry_collect((u32)params->id, params->pid, params->scm_fd_orig);
> if (!entry)
> goto err;
>
> @@ -145,7 +150,7 @@ static int dump_one_reg_file(const struct fd_parms *p, int lfd,
> }
>
> pr_info("fdinfo: type: %2x len: %2x flags: %4x pos: %8lx addr: %16lx\n",
> - p->type, len, p->flags, p->pos, p->fd_name);
> + params->type, len, params->flags, params->pos, e.addr);
>
> if (write_img(cr_fdset->fds[CR_FD_FDINFO],&e))
> goto err;
> @@ -163,35 +168,41 @@ static int dump_task_special_files(pid_t pid, const struct cr_fdset *cr_fdset)
> int fd, ret;
>
> /* Dump /proc/pid/cwd */
> + fd = open_proc(pid, "cwd");
> + if (fd< 0)
> + return -1;
> params = (struct fd_parms) {
> + .scm_fd_orig = fd,
> + .scm_fd_locl = fd,
> .id = FD_ID_INVALID,
> .pid = FD_PID_INVALID,
> .type = FDINFO_CWD,
> };
>
> - fd = open_proc(pid, "cwd");
> - if (fd< 0)
> - return -1;
> - ret = dump_one_reg_file(¶ms, fd, cr_fdset, 1);
> + ret = dump_one_reg_file(¶ms, cr_fdset);
> + close(fd);
> if (ret)
> return ret;
>
> /* Dump /proc/pid/exe */
> + fd = open_proc(pid, "exe");
> + if (fd< 0)
> + return -1;
> params = (struct fd_parms) {
> + .scm_fd_orig = fd,
> + .scm_fd_locl = fd,
> .id = FD_ID_INVALID,
> .pid = FD_PID_INVALID,
> .type = FDINFO_EXE,
> };
>
> - fd = open_proc(pid, "exe");
> - if (fd< 0)
> - return -1;
> - ret = dump_one_reg_file(¶ms, fd, cr_fdset, 1);
> + ret = dump_one_reg_file(¶ms, cr_fdset);
> + close(fd);
>
> return ret;
> }
>
> -static int dump_pipe_and_data(int lfd, struct pipe_entry *e,
> +static int dump_pipe_and_data(int fd, struct pipe_entry *e,
> const struct cr_fdset *cr_fdset)
> {
> int fd_pipes;
> @@ -208,13 +219,13 @@ static int dump_pipe_and_data(int lfd, struct pipe_entry *e,
> goto err;
> }
>
> - pipe_size = fcntl(lfd, F_GETPIPE_SZ);
> + pipe_size = fcntl(fd, F_GETPIPE_SZ);
> if (pipe_size< 0) {
> pr_err("Can't obtain piped data size\n");
> goto err;
> }
>
> - has_bytes = tee(lfd, steal_pipe[1], pipe_size, SPLICE_F_NONBLOCK);
> + has_bytes = tee(fd, steal_pipe[1], pipe_size, SPLICE_F_NONBLOCK);
> if (has_bytes< 0) {
> if (errno != EAGAIN) {
> pr_perror("Can't pick pipe data");
> @@ -246,108 +257,86 @@ err:
> return ret;
> }
>
> -static int dump_one_pipe(const struct fd_parms *p, unsigned int id, int lfd,
> +static int dump_one_pipe(const struct fd_parms *p, unsigned int id,
> const struct cr_fdset *cr_fdset)
> {
> struct pipe_entry e;
> int ret = -1;
> struct statfs stfs_buf;
>
> - if (fstatfs(lfd,&stfs_buf)< 0) {
> - pr_perror("Can't fstatfs on %ld", p->fd_name);
> + if (fstatfs(p->scm_fd_locl,&stfs_buf)< 0) {
> + pr_perror("Can't fstatfs on %d", p->scm_fd_orig);
> return -1;
> }
>
> if (stfs_buf.f_type != PIPEFS_MAGIC) {
> - pr_err("Dumping of FIFO's is not supported: %ld\n", p->fd_name);
> + pr_err("Dumping of FIFO's is not supported: %d\n", p->scm_fd_orig);
> return -1;
> }
>
> - pr_info("Dumping pipe %ld/%x flags %x\n", p->fd_name, id, p->flags);
> + pr_info("Dumping pipe %d/%x flags %x\n", p->scm_fd_orig, id, p->flags);
>
> - e.fd = p->fd_name;
> + e.fd = p->scm_fd_orig;
> e.pipeid = id;
> e.flags = p->flags;
> + e.bytes = 0;
>
> - if (p->flags& O_WRONLY) {
> - e.bytes = 0;
> + if (p->flags& O_WRONLY)
> ret = write_img(cr_fdset->fds[CR_FD_PIPES],&e);
> - } else
> - ret = dump_pipe_and_data(lfd,&e, cr_fdset);
> + else
> + ret = dump_pipe_and_data(p->scm_fd_locl,&e, cr_fdset);
>
> err:
> if (!ret)
> pr_info("Dumped pipe: fd: %8x pipeid: %8x flags: %8x bytes: %8x\n",
> e.fd, e.pipeid, e.flags, e.bytes);
> else
> - pr_err("Dumping pipe %ld/%x flags %x\n", p->fd_name, id, p->flags);
> + pr_err("Dumping pipe %d/%x flags %x\n", p->scm_fd_orig, id, p->flags);
>
> return ret;
> }
>
> -static int read_fd_params(pid_t pid, const char *fd, struct fd_parms *p)
> -{
> - FILE *file;
> - int ret;
> -
> - file = fopen_proc(pid, "fdinfo/%s", fd);
> - if (!file)
> - return -1;
> -
> - p->fd_name = atoi(fd);
> - ret = fscanf(file, "pos:\t%li\nflags:\t%o\n",&p->pos,&p->flags);
> - fclose(file);
> -
> - if (ret != 2) {
> - pr_err("Bad format of fdinfo file (%d items, want 2)\n", ret);
> - return -1;
> - }
> -
> - pr_info("%d fdinfo %s: pos: %16lx flags: %16o\n",
> - pid, fd, p->pos, p->flags);
> -
> - p->pid = pid;
> - p->id = FD_ID_INVALID;
> -
> - return 0;
> -}
> -
> -static int dump_one_fd(pid_t pid, int pid_fd_dir, const char *d_name,
> +static int dump_one_fd(struct fd_parms *params,
> const struct cr_fdset *cr_fdset,
> struct sk_queue *sk_queue)
> {
> struct stat fd_stat;
> int err = -1;
> - struct fd_parms p;
> - int lfd;
>
> - if (read_fd_params(pid, d_name,&p))
> - return -1;
> -
> - lfd = openat(pid_fd_dir, d_name, O_RDONLY);
> - if (lfd< 0) {
> - err = try_dump_socket(pid, p.fd_name, cr_fdset, sk_queue);
> - if (err != 1)
> - return err;
> + pr_info("%d fdinfo %d (%d): pos: %16lx flags: %16o\n",
> + params->pid, params->scm_fd_orig, params->scm_fd_locl,
> + params->pos, params->flags);
>
> - pr_perror("Failed to open %d/%ld", pid_fd_dir, p.fd_name);
> - return -1;
> + /*
> + * Check if it's a socket.
> + */
> + {
> + int fd = open_proc_nocheck(params->pid, "fd/%d", params->scm_fd_orig);
> + if (fd< 0) {
> + err = try_dump_socket(params->pid, params->scm_fd_orig,
> + cr_fdset, sk_queue);
> + if (err != 1)
> + return err;
> +
> + pr_perror("Failed to open %d", params->scm_fd_orig);
> + return -1;
> + } else
> + close(fd);
> }
>
> - if (fstat(lfd,&fd_stat)< 0) {
> - pr_perror("Can't get stat on %ld", p.fd_name);
> - goto out_close;
> + if (fstat(params->scm_fd_locl,&fd_stat)< 0) {
> + pr_perror("Can't get stat on %d", params->scm_fd_orig);
> + return -1;
> }
>
> if (S_ISCHR(fd_stat.st_mode)&&
> (major(fd_stat.st_rdev) == TTY_MAJOR ||
> major(fd_stat.st_rdev) == UNIX98_PTY_SLAVE_MAJOR)) {
> /* skip only standard destriptors */
> - if (p.fd_name< 3) {
> - err = 0;
> - pr_info("... Skipping tty ... %d/%ld\n",
> - pid_fd_dir, p.fd_name);
> - goto out_close;
> + if (params->scm_fd_orig< 3) {
> + pr_info("... Skipping tty ... %d\n",
> + params->scm_fd_orig);
> + return 0;
> }
> goto err;
> }
> @@ -356,62 +345,69 @@ static int dump_one_fd(pid_t pid, int pid_fd_dir, const char *d_name,
> S_ISDIR(fd_stat.st_mode) ||
> (S_ISCHR(fd_stat.st_mode)&& major(fd_stat.st_rdev) == MEM_MAJOR)) {
>
> - p.id = MAKE_FD_GENID(fd_stat.st_dev, fd_stat.st_ino, p.pos);
> - p.type = FDINFO_REG;
> + params->id = MAKE_FD_GENID(fd_stat.st_dev, fd_stat.st_ino, params->pos);
> + params->type = FDINFO_REG;
>
> - return dump_one_reg_file(&p, lfd, cr_fdset, 1);
> + return dump_one_reg_file(params, cr_fdset);
> }
>
> if (S_ISFIFO(fd_stat.st_mode))
> - return dump_one_pipe(&p, fd_stat.st_ino, lfd, cr_fdset);
> + return dump_one_pipe(params, fd_stat.st_ino, cr_fdset);
>
> err:
> - pr_err("Can't dump file %ld of that type [%x]\n", p.fd_name, fd_stat.st_mode);
> + pr_err("Can't dump file %d of that type [%x]\n",
> + params->scm_fd_orig, fd_stat.st_mode);
>
> -out_close:
> - close_safe(&lfd);
> return err;
> }
>
> -static int dump_task_files(pid_t pid, const struct cr_fdset *cr_fdset,
> - struct sk_queue *sk_queue)
> +static int dump_task_files_seized(struct parasite_ctl *ctl,
> + struct fds_scm_map *fds_map,
> + const struct cr_fdset *cr_fdset,
> + struct sk_queue *sk_queue)
> {
> - struct dirent *de;
> - unsigned long pos;
> - unsigned int flags;
> - DIR *fd_dir;
> + struct fd_parms params;
> + unsigned int i;
> + int ret;
>
> pr_info("\n");
> - pr_info("Dumping opened files (pid: %d)\n", pid);
> + pr_info("Dumping opened files (pid: %d)\n", ctl->pid);
> pr_info("----------------------------------------\n");
>
> /*
> + * Parasite should transfer file descriptors
> + * to our space.
> + */
> + ret = parasite_tx_fds_seized(ctl, fds_map->fd_orig, fds_map->fd_locl, fds_map->nr_fds);
> + if (ret)
> + return -1;
> +
> + /*
> * Dump special files at the beginning. We might need
> * to re-read them in restorer, so better to make it
> * fast.
> */
> - if (dump_task_special_files(pid, cr_fdset)) {
> + if (dump_task_special_files(ctl->pid, cr_fdset)) {
> pr_err("Can't dump special files\n");
> return -1;
> }
>
> - fd_dir = opendir_proc(pid, "fd");
> - if (!fd_dir)
> - return -1;
> + for (i = 0; i< fds_map->nr_fds; i++) {
>
> - while ((de = readdir(fd_dir))) {
> - if (!strcmp(de->d_name, "."))
> - continue;
> - if (!strcmp(de->d_name, ".."))
> - continue;
> - if (dump_one_fd(pid, dirfd(fd_dir), de->d_name, cr_fdset,
> - sk_queue))
> + params.scm_fd_locl = fds_map->fd_locl[i];
> + params.scm_fd_orig = fds_map->fd_orig[i];
> + params.pos = lseek(params.scm_fd_locl, 0, SEEK_CUR);
> + params.flags = fcntl(params.scm_fd_locl, F_GETFL);
> + params.pid = ctl->pid;
> + params.id = FD_ID_INVALID;
> +
> + ret = dump_one_fd(¶ms, cr_fdset, sk_queue);
> + if (ret)
> return -1;
> }
>
> pr_info("----------------------------------------\n");
>
> - closedir(fd_dir);
> return 0;
> }
>
> @@ -447,21 +443,24 @@ static int dump_task_mappings(pid_t pid, const struct list_head *vma_area_list,
> if (write_img(cr_fdset->fds[CR_FD_SHMEM],&e))
> goto err;
> } else if (vma_entry_is(vma, VMA_FILE_PRIVATE) ||
> - vma_entry_is(vma, VMA_FILE_SHARED)) {
> - struct fd_parms p = {
> - .fd_name = vma->start,
> + vma_entry_is(vma, VMA_FILE_SHARED)) {
> +
> + struct fd_parms params = {
> + .scm_fd_locl = vma_area->vm_file_fd,
> + .scm_fd_orig = vma_area->vm_file_fd,
> + .vma_entry = vma,
> .id = FD_ID_INVALID,
> .pid = pid,
> .type = FDINFO_MAP,
> };
>
> if (vma->prot& PROT_WRITE&&
> - vma_entry_is(vma, VMA_FILE_SHARED))
> - p.flags = O_RDWR;
> + vma_entry_is(vma, VMA_FILE_SHARED))
> + params.flags = O_RDWR;
> else
> - p.flags = O_RDONLY;
> + params.flags = O_RDONLY;
>
> - ret = dump_one_reg_file(&p, vma_area->vm_file_fd, cr_fdset, 0);
> + ret = dump_one_reg_file(¶ms, cr_fdset);
> if (ret)
> goto err;
> }
> @@ -1287,6 +1286,7 @@ static int dump_one_task(const struct pstree_item *item, struct cr_fdset *cr_fds
> int ret = -1;
> struct parasite_dump_misc misc;
> struct sk_queue sk_queue = { };
> + struct fds_scm_map *fds_map = NULL;
>
> pr_info("========================================\n");
> pr_info("Dumping task (pid: %d)\n", pid);
> @@ -1315,9 +1315,9 @@ static int dump_one_task(const struct pstree_item *item, struct cr_fdset *cr_fds
> goto err;
> }
>
> - ret = dump_task_files(pid, cr_fdset,&sk_queue);
> + ret = fds_scm_collect_orig(pid,&fds_map);
> if (ret) {
> - pr_err("Dump files (pid: %d) failed with %d\n", pid, ret);
> + pr_err("Collecting fds (pid: %d) failed with %d\n", pid, ret);
> goto err;
> }
>
> @@ -1327,6 +1327,12 @@ static int dump_one_task(const struct pstree_item *item, struct cr_fdset *cr_fds
> goto err;
> }
>
> + ret = dump_task_files_seized(parasite_ctl, fds_map, cr_fdset,&sk_queue);
> + if (ret) {
> + pr_err("Dump files (pid: %d) failed with parasite\n", pid);
> + goto err;
> + }
> +
> ret = parasite_dump_pages_seized(parasite_ctl,&vma_area_list, cr_fdset);
> if (ret) {
> pr_err("Can't dump pages (pid: %d) with parasite\n", pid);
> @@ -1394,11 +1400,14 @@ static int dump_one_task(const struct pstree_item *item, struct cr_fdset *cr_fds
> }
>
> free_mappings(&vma_area_list);
> + fds_scm_map_destroy(&fds_map);
>
> err:
> close_pid_proc();
> err_free:
> free_mappings(&vma_area_list);
> + fds_scm_map_destroy(&fds_map);
> +
> return ret;
> }
>
> diff --git a/fds-scm-map.c b/fds-scm-map.c
> new file mode 100644
> index 0000000..9b79065
> --- /dev/null
> +++ b/fds-scm-map.c
> @@ -0,0 +1,140 @@
> +#include<stdio.h>
> +#include<stdlib.h>
> +#include<stdarg.h>
> +#include<limits.h>
> +#include<unistd.h>
> +#include<errno.h>
> +#include<string.h>
> +
> +#include<sys/types.h>
> +#include<sys/mman.h>
> +#include<sys/stat.h>
> +#include<fcntl.h>
> +#include<dirent.h>
> +
> +#include "types.h"
> +#include "compiler.h"
> +#include "crtools.h"
> +#include "util.h"
> +
> +#include "fds-scm-map.h"
> +
> +#ifndef CONFIG_X86_64
> +# error No x86-32 support yet
> +#endif
> +
> +struct fds_scm_map *fds_scm_map_create(void)
> +{
> + struct fds_scm_map *map;
> +
> + map = xzalloc(sizeof(*map));
> + if (map) {
> + map->fd_locl = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> + if (map->fd_locl == MAP_FAILED)
> + goto err_nomem;
> +
> + map->fd_orig = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> + if (map->fd_orig == MAP_FAILED) {
> + munmap(map->fd_locl, PAGE_SIZE);
> + goto err_nomem;
> + }
> +
> + map->size_fd_orig = PAGE_SIZE;
> + map->size_fd_locl = PAGE_SIZE;
> + }
> +
> + return map;
> +
> +err_nomem:
> + pr_perror("Can't allocate memory for SCM fd map");
> + xfree(map);
> + return NULL;
> +}
> +
> +void fds_scm_map_destroy(struct fds_scm_map **map)
> +{
> + if (!map || !*map)
> + return;
> +
> + if ((*map)->fd_locl != MAP_FAILED)
> + munmap((*map)->fd_locl, (*map)->size_fd_locl);
> +
> + if ((*map)->fd_orig != MAP_FAILED)
> + munmap((*map)->fd_orig, (*map)->size_fd_orig);
> +
> + xfree(*map);
> + *map = NULL;
> +}
> +
> +int fds_scm_map_expand(struct fds_scm_map *map)
> +{
> + int *new;
> +
> + new = mremap(map->fd_locl, map->size_fd_locl,
> + map->size_fd_locl + PAGE_SIZE, MREMAP_MAYMOVE);
> + if (new == MAP_FAILED)
> + goto err_nomem;
> +
> + map->fd_locl = new;
> + map->size_fd_locl += PAGE_SIZE;
> +
> + new = mremap(map->fd_orig, map->size_fd_orig,
> + map->size_fd_orig + PAGE_SIZE, MREMAP_MAYMOVE);
> + if (new == MAP_FAILED)
> + goto err_nomem;
> +
> + map->fd_orig = new;
> + map->size_fd_orig += PAGE_SIZE;
> +
> + return 0;
> +err_nomem:
> + pr_perror("Can't expand memory for SCM fd map");
> + return -ENOMEM;
> +}
> +
> +int fds_scm_collect_orig(pid_t pid, struct fds_scm_map **map)
> +{
> + unsigned int n = 0;
> + struct dirent *d;
> + DIR *dir;
> +
> + BUG_ON(*map);
> +
> + pr_info("\n");
> + pr_info("Collecting fds (pid: %d)\n", pid);
> + pr_info("----------------------------------------\n");
> +
> + *map = fds_scm_map_create();
> + if (!*map)
> + return -1;
> +
> + dir = opendir_proc(pid, "fd");
> + if (!dir)
> + return -1;
> +
> + while ((d = readdir(dir))) {
> + if (d->d_name[0] == '.')
> + continue;
> +
> + if (SCM_MAP_NEED_EXPAND((*map)->size_fd_orig, n + 1)) {
> + if (fds_scm_map_expand(*map))
> + return -ENOMEM;
> + }
> +
> + (*map)->fd_orig[n] = atoi(d->d_name);
> + n++;
> + }
> +
> + pr_info("Found %d file descriptors\n", n);
> +
> + (*map)->nr_fds = n;
> +
> + pr_info("----------------------------------------\n");
> + return 0;
> +
> +err_nomem:
> + pr_perror("Can't allocate memory for %d\n", pid);
> + return -ENOMEM;
> +}
> diff --git a/include/fds-scm-map.h b/include/fds-scm-map.h
> new file mode 100644
> index 0000000..393d28f
> --- /dev/null
> +++ b/include/fds-scm-map.h
> @@ -0,0 +1,22 @@
> +#ifndef FDS_SCM_MAP
> +#define FDS_SCM_MAP
> +
> +#include<sys/types.h>
> +
> +extern struct fds_scm_map *fds_scm_map_create(void);
> +extern void fds_scm_map_destroy(struct fds_scm_map **map);
> +extern int fds_scm_map_expand(struct fds_scm_map *map);
> +extern int fds_scm_collect_orig(pid_t pid, struct fds_scm_map **map);
> +
> +struct fds_scm_map {
> + int *fd_locl;
> + int *fd_orig;
> +
> + unsigned int nr_fds;
> + unsigned long size_fd_locl;
> + unsigned long size_fd_orig;
> +};
> +
> +#define SCM_MAP_NEED_EXPAND(size, nr) (((size) / sizeof(int))<= (nr))
> +
> +#endif /* FDSET_SCM_MAP */
> diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h
> index 5ae1554..1319f2e 100644
> --- a/include/parasite-syscall.h
> +++ b/include/parasite-syscall.h
> @@ -31,6 +31,7 @@ struct parasite_ctl {
>
> extern int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
> extern int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
> +extern int parasite_tx_fds_seized(struct parasite_ctl *ctl, int *fds_tx, int *fds_rx, unsigned int nr_fds);
>
> struct parasite_dump_misc;
> extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);
> diff --git a/include/parasite.h b/include/parasite.h
> index dbad64d..8d6787d 100644
> --- a/include/parasite.h
> +++ b/include/parasite.h
> @@ -8,10 +8,11 @@
> #include "compiler.h"
> #include "image.h"
> #include "sockets.h"
> +#include "util-net.h"
>
> #define __parasite_head __used __section(.parasite.head.text)
>
> -#define PARASITE_STACK_SIZE 2048
> +#define PARASITE_STACK_SIZE 4096
> #define PARASITE_ARG_SIZE 8196
>
> #define PARASITE_MAX_SIZE (64<< 10)
> @@ -30,6 +31,7 @@ enum {
> PARASITE_CMD_DUMP_MISC,
> PARASITE_CMD_DUMP_TID_ADDR,
> PARASITE_CMD_DUMP_SK_QUEUES,
> + PARASITE_CMD_SCM_FD,
>
> PARASITE_CMD_MAX,
> };
> @@ -91,6 +93,15 @@ struct parasite_dump_sk_queues {
> struct sk_queue_item items[0];
> };
>
> +struct parasite_scm_fd {
> + parasite_status_t status;
> +
> + struct scm_fdset fdset;
> +
> + struct sockaddr_un saddr;
> + int sun_len;
> +};
> +
> /*
> * Some useful offsets
> */
> diff --git a/include/syscall-codes.h b/include/syscall-codes.h
> index 86adf75..0c284b9 100644
> --- a/include/syscall-codes.h
> +++ b/include/syscall-codes.h
> @@ -35,6 +35,7 @@
> #define __NR_wait4 61
> #define __NR_kill 62
> #define __NR_flock 73
> +#define __NR_getdents 78
> #define __NR_unlink 87
> #define __NR_setresuid 117
> #define __NR_setresgid 119
> diff --git a/include/syscall.h b/include/syscall.h
> index 31a5d9a..0206ab9 100644
> --- a/include/syscall.h
> +++ b/include/syscall.h
> @@ -298,6 +298,11 @@ static always_inline long sys_flock(unsigned long fd, unsigned long cmd)
> return syscall2(__NR_flock, fd, cmd);
> }
>
> +static always_inline long sys_getdents(unsigned int fd, struct linux_dirent *dirent, unsigned int count)
> +{
> + return syscall3(__NR_getdents, (long)fd, (long)dirent, (long)count);
> +}
> +
> static void always_inline local_sleep(long seconds)
> {
> struct timespec req, rem;
> diff --git a/include/types.h b/include/types.h
> index 0f87b4b..45220c8 100644
> --- a/include/types.h
> +++ b/include/types.h
> @@ -195,4 +195,22 @@ enum kcmp_type {
> KCMP_TYPES,
> };
>
> +struct linux_dirent {
> + unsigned long d_ino;
> + unsigned long d_off;
> + unsigned short d_reclen;
> + char d_name[256];
> +};
> +
> +/* For UNIX sockets data */
> +#ifndef UIO_FASTIOV
> +# define UIO_FASTIOV 8
> +#endif
> +#ifndef UIO_MAXIOV
> +#define UIO_MAXIOV 1024
> +#endif
> +#ifndef SCM_MAX_FD
> +# define SCM_MAX_FD 253
> +#endif
> +
> #endif /* CR_TYPES_H_ */
> diff --git a/include/util-net.h b/include/util-net.h
> index c5a8c46..ae63a48 100644
> --- a/include/util-net.h
> +++ b/include/util-net.h
> @@ -1,6 +1,19 @@
> #ifndef UTIL_NET_H_
> #define UTIL_NET_H_
>
> +#include<sys/socket.h>
> +#include<sys/un.h>
> +
> +/*
> + * Because kernel do kmalloc for user data passed
> + * in SCM messages, and there is SCM_MAX_FD as a limit
> + * for descriptors passed at once -- we're trying to
> + * eliminate pressue on kernel memory manager and use
> + * predefined known to work well size of the message buffer.
> + */
> +#define CR_SCM_MSG_SIZE (1024)
> +#define CR_SCM_MAX_FD (252)
> +
> #define UNIX_PATH_MAX (sizeof(struct sockaddr_un) - \
> (size_t)((struct sockaddr_un *) 0)->sun_path)
>
> @@ -8,6 +21,26 @@
> #define SO_PEEK_OFF 42
> #endif
>
> +struct scm_fdset {
> + struct msghdr hdr;
> + struct iovec iov;
> + char msg_buf[CR_SCM_MSG_SIZE];
> + int __pad;
> + union {
> + int nr_fds_rx;
> + int nr_fds_tx;
> + int __nr_fds;
> + };
> +};
> +
> +extern int *scm_fdset_init(struct scm_fdset *fdset);
> +extern void scm_fdset_set_addr(struct scm_fdset *fdset, struct sockaddr_un *saddr, int saddr_len);
> +extern int *scm_fdset_first(struct scm_fdset *fdset);
> +extern int scm_fdset_update(struct scm_fdset *fdset, int nr_fds);
> +extern int scm_fdset_send(int sock, struct scm_fdset *fdset);
> +extern int scm_fdset_recv(int sock, struct scm_fdset *fdset);
> +
> extern int send_fd(int sock, struct sockaddr_un *saddr, int len, int fd);
> extern int recv_fd(int sock);
> +
> #endif
> diff --git a/include/util.h b/include/util.h
> index 915a3b5..963665b 100644
> --- a/include/util.h
> +++ b/include/util.h
> @@ -11,6 +11,8 @@
> #include<errno.h>
>
> #include<sys/types.h>
> +#include<sys/stat.h>
> +#include<fcntl.h>
> #include<dirent.h>
>
> #include "compiler.h"
> @@ -189,6 +191,17 @@ int do_open_proc(pid_t pid, int flags, const char *fmt, ...);
> __fd; \
> })
>
> +#define __open_proc_nocheck(pid, flags, fmt, ...) \
> + ({ \
> + int __fd = do_open_proc(pid, flags, \
> + fmt, ##__VA_ARGS__); \
> + \
> + __fd; \
> + })
> +
> +#define open_proc_nocheck(pid, fmt, ...) \
> + __open_proc_nocheck(pid, O_RDONLY, fmt, ##__VA_ARGS__)
> +
> /* int open_proc(pid_t pid, const char *fmt, ...); */
> #define open_proc(pid, fmt, ...) \
> __open_proc(pid, O_RDONLY, fmt, ##__VA_ARGS__)
> diff --git a/parasite-syscall.c b/parasite-syscall.c
> index 35c3ed7..deac303 100644
> --- a/parasite-syscall.c
> +++ b/parasite-syscall.c
> @@ -320,13 +320,13 @@ static int munmap_seized(struct parasite_ctl *ctl, void *addr, size_t length)
> return ret;
> }
>
> -static int gen_parasite_saddr(struct sockaddr_un *saddr, pid_t pid)
> +static int gen_parasite_saddr(struct sockaddr_un *saddr, int salt)
> {
> int sun_len;
>
> saddr->sun_family = AF_UNIX;
> snprintf(saddr->sun_path, UNIX_PATH_MAX,
> - "X/crtools-pr-%d", pid);
> + "X/crtools-pr-%d", salt);
>
> sun_len = SUN_LEN(saddr);
> *saddr->sun_path = '\0';
> @@ -399,17 +399,19 @@ out:
> return ret;
> }
>
> -static int parasite_init(struct parasite_ctl *ctl, pid_t pid)
> +static int parasite_init(struct parasite_ctl *ctl)
> {
> struct parasite_init_args args = { };
> + int ret;
> +
> + args.sun_len = gen_parasite_saddr(&args.saddr, ctl->pid);
>
> - args.sun_len = gen_parasite_saddr(&args.saddr, pid);
> + ret = parasite_execute(PARASITE_CMD_INIT, ctl, (parasite_status_t *)&args, sizeof(args));
>
> - return parasite_execute(PARASITE_CMD_INIT, ctl,
> - (parasite_status_t *)&args, sizeof(args));
> + return ret;
> }
>
> -static int parasite_set_logfd(struct parasite_ctl *ctl, pid_t pid)
> +static int parasite_set_logfd(struct parasite_ctl *ctl)
> {
> parasite_status_t args = { };
> int ret;
> @@ -419,10 +421,8 @@ static int parasite_set_logfd(struct parasite_ctl *ctl, pid_t pid)
> return ret;
>
> ret = parasite_execute(PARASITE_CMD_SET_LOGFD, ctl,&args, sizeof(args));
> - if (ret< 0)
> - return ret;
>
> - return 0;
> + return ret;
> }
>
> int parasite_dump_tid_addr_seized(struct parasite_ctl *ctl, pid_t pid, unsigned int **tid_addr)
> @@ -457,6 +457,78 @@ int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_mis
> sizeof(struct parasite_dump_misc));
> }
>
> +/* Transmit file descriptors from dumpee into our space */
> +int parasite_tx_fds_seized(struct parasite_ctl *ctl, int *fds_tx, int *fds_rx, unsigned int nr_fds)
> +{
> + struct parasite_scm_fd args = { };
> + parasite_status_t *st =&args.status;
> + struct scm_fdset *fdset =&args.fdset;
> + int ret = -1;
> + int sock;
> +
> + args.sun_len = gen_parasite_saddr(&args.saddr, (int)-2u);
> +
> + sock = socket(PF_UNIX, SOCK_DGRAM, 0);
> + if (sock< 0) {
> + pr_perror("Can't create socket");
> + return -1;
> + }
> +
> + ret = bind(sock, (struct sockaddr *)&args.saddr, args.sun_len);
> + if (ret< 0) {
> + pr_perror("Can't bind socket");
> + goto err;
> + }
> +
> + while (nr_fds) {
> + int *__fds_tx;
> + int *__fds_rx;
> +
> + scm_fdset_init(fdset);
> + scm_fdset_update(fdset, nr_fds);
> +
> + nr_fds -= fdset->__nr_fds;
> +
> + __fds_tx = scm_fdset_first(fdset);
> +
> + memcpy(__fds_tx, fds_tx, sizeof(int) * fdset->nr_fds_tx);
> + fds_tx += fdset->nr_fds_tx;
> +
> + ret = parasite_execute(PARASITE_CMD_SCM_FD, ctl, st, sizeof(args));
> + if (ret) {
> + pr_err("SCM_RIGHTS failed on TX: %d\n", ret);
> + goto err;
> + }
> +
> + scm_fdset_init(fdset);
> + scm_fdset_set_addr(fdset,&args.saddr, args.sun_len);
> +
> + ret = scm_fdset_recv(sock, fdset);
> + if (ret) {
> + pr_err("SCM_RIGHTS failed on RX: %d (%s)\n",
> + ret, strerror(-ret));
> + goto err;
> + }
> + __fds_rx = scm_fdset_first(fdset);
> +
> +#if 1
> + {
> + int i = 0;
> + for (i = 0; i< fdset->nr_fds_rx; i++)
> + pr_debug("\tRX: %016x\n", __fds_rx[i]);
> + }
> +#endif
> +
> + memcpy(fds_rx, __fds_rx, sizeof(int) * fdset->nr_fds_rx);
> + fds_rx += fdset->nr_fds_rx;
> + }
> + ret = 0;
> +
> +err:
> + close(sock);
> + return ret;
> +}
> +
> int parasite_dump_socket_info(struct parasite_ctl *ctl, struct cr_fdset *fdset,
> struct sk_queue *queue)
> {
> @@ -678,8 +750,8 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
> goto err;
> }
>
> - ctl->pid = pid;
> - ctl->syscall_ip = vma_area->vma.start;
> + ctl->pid = pid;
> + ctl->syscall_ip = vma_area->vma.start;
>
> /*
> * Inject syscall instruction and remember original code,
> @@ -734,7 +806,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
> ctl->addr_cmd = (void *)PARASITE_CMD_ADDR((unsigned long)ctl->local_map);
> ctl->addr_args = (void *)PARASITE_ARGS_ADDR((unsigned long)ctl->local_map);
>
> - ret = parasite_init(ctl, pid);
> + ret = parasite_init(ctl);
> if (ret) {
> pr_err("%d: Can't create a transport socket\n", pid);
> goto err_restore;
> @@ -742,7 +814,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
>
> ctl->signals_blocked = 1;
>
> - ret = parasite_set_logfd(ctl, pid);
> + ret = parasite_set_logfd(ctl);
> if (ret) {
> pr_err("%d: Can't set a logging descriptor\n", pid);
> goto err_restore;
> diff --git a/parasite.c b/parasite.c
> index 53b34a3..2de2cd1 100644
> --- a/parasite.c
> +++ b/parasite.c
> @@ -85,6 +85,27 @@ static unsigned long builtin_strlen(char *str)
> return len;
> }
>
> +static int builtin_atoi(char *str)
> +{
> + int ret = str[0] - '0';
> + int sign = 1;
> +
> + while (*str == ' ')
> + str++;
> +
> + if (*str == '-')
> + sign = -1;
> +
> + while (*str++) {
> + int v = str[0] - '0';
> + if (v< 0 || v> 9)
> + break;
> + ret = ret * 10 + v;
> + }
> +
> + return ret * sign;
> +}
> +
> static const unsigned char hex[] = "0123456789abcdef";
> static char *long2hex(unsigned long v)
> {
> @@ -512,6 +533,47 @@ err_dmp:
> return ret;
> }
>
> +static int scm_fd(struct parasite_scm_fd *args)
> +{
> + parasite_status_t *st =&args->status;
> + struct scm_fdset *fdset =&args->fdset;
> + int __nr_fds = fdset->__nr_fds;
> + int ret;
> +
> + /*
> + * Need to reinit and bind the address,
> + * this fdset came from different address
> + * space. Note we don't poke data it consist.
> + */
> + scm_fdset_init(fdset);
> + scm_fdset_set_addr(fdset,&args->saddr, args->sun_len);
> + scm_fdset_update(fdset, __nr_fds);
> +
> +#if 1
> + sys_write_msg("\tNRfds: ");
> + sys_write_msg(long2hex(fdset->__nr_fds));
> + sys_write_msg("\n");
> +
> + {
> + int i;
> + int *fd = scm_fdset_first(fdset);
> + for (i = 0; i< fdset->__nr_fds; i++) {
> + sys_write_msg("\tTX: ");
> + sys_write_msg(long2hex(fd[i]));
> + sys_write_msg("\n");
> + }
> + }
> +#endif
> +
> + ret = scm_fdset_send(tsock, fdset);
> + if (ret<= 0) {
> + sys_write_msg("scm_fdset_send failed\n");
> + SET_PARASITE_ERR(st, ret);
> + }
> +
> + return 0;
> +}
> +
> static int init(struct parasite_init_args *args)
> {
> parasite_status_t *st =&args->status;
> @@ -579,6 +641,7 @@ static int __used parasite_service(unsigned long cmd, void *args)
> BUILD_BUG_ON(sizeof(struct parasite_dump_misc)> PARASITE_ARG_SIZE);
> BUILD_BUG_ON(sizeof(struct parasite_dump_tid_addr)> PARASITE_ARG_SIZE);
> BUILD_BUG_ON(sizeof(struct parasite_dump_sk_queues)> PARASITE_ARG_SIZE);
> + BUILD_BUG_ON(sizeof(struct parasite_scm_fd)> PARASITE_ARG_SIZE);
>
> switch (cmd) {
> case PARASITE_CMD_INIT:
> @@ -603,6 +666,8 @@ static int __used parasite_service(unsigned long cmd, void *args)
> return dump_tid_addr((struct parasite_dump_tid_addr *)args);
> case PARASITE_CMD_DUMP_SK_QUEUES:
> return dump_skqueues((struct parasite_dump_sk_queues *)args);
> + case PARASITE_CMD_SCM_FD:
> + return scm_fd((struct parasite_scm_fd *)args);
> default:
> {
> parasite_status_t *st = (parasite_status_t *)args;
> diff --git a/util-net.c b/util-net.c
> index dad02c4..492bdda 100644
> --- a/util-net.c
> +++ b/util-net.c
> @@ -1,8 +1,110 @@
> #include<sys/socket.h>
> #include<sys/un.h>
> +#include<errno.h>
>
> +#include "compiler.h"
> +#include "types.h"
> #include "syscall.h"
>
> +#include "util-net.h"
> +
> +/* Setup the number of FDs placed into FDs array */
> +int scm_fdset_update(struct scm_fdset *fdset, int nr_fds)
> +{
> + int min_fd = min(nr_fds, CR_SCM_MAX_FD);
> + struct cmsghdr *cmsg;
> +
> + cmsg = CMSG_FIRSTHDR(&fdset->hdr);
> + fdset->hdr.msg_controllen = CMSG_LEN(sizeof(int) * min_fd);
> + cmsg->cmsg_len = fdset->hdr.msg_controllen;
> + fdset->__nr_fds = min_fd;
> +
> + return min_fd;
> +}
> +
> +/* Returns pointer to the FDs array */
> +int *scm_fdset_first(struct scm_fdset *fdset)
> +{
> + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&fdset->hdr);
> + return (int *)CMSG_DATA(cmsg);
> +}
> +
> +/* Returns a pointer to the data where FDs should be placed */
> +int *scm_fdset_init(struct scm_fdset *fdset)
> +{
> + struct cmsghdr *cmsg;
> +
> + BUILD_BUG_ON(CR_SCM_MAX_FD> SCM_MAX_FD);
> + BUILD_BUG_ON(sizeof(fdset->msg_buf)< (CMSG_SPACE(sizeof(int) * CR_SCM_MAX_FD)));
> +
> + fdset->__nr_fds = CR_SCM_MAX_FD;
> + fdset->__pad = 0;
> +
> + fdset->iov.iov_base =&fdset->__pad;
> + fdset->iov.iov_len = sizeof(fdset->__pad);
> +
> + /*
> + * msg_name and msg_namelen should be assigned separately.
> + */
> + fdset->hdr.msg_iov =&fdset->iov;
> + fdset->hdr.msg_iovlen = 1;
> +
> + fdset->hdr.msg_control =&fdset->msg_buf;
> + fdset->hdr.msg_controllen = CMSG_LEN(sizeof(int) * CR_SCM_MAX_FD);
> +
> + cmsg = CMSG_FIRSTHDR(&fdset->hdr);
> + cmsg->cmsg_len = fdset->hdr.msg_controllen;
> + cmsg->cmsg_level = SOL_SOCKET;
> + cmsg->cmsg_type = SCM_RIGHTS;
> +
> + return scm_fdset_first(fdset);
> +}
> +
> +/* Just for fine-grained assignment */
> +void scm_fdset_set_addr(struct scm_fdset *fdset, struct sockaddr_un *saddr, int saddr_len)
> +{
> + fdset->hdr.msg_name = (struct sockaddr *)saddr;
> + fdset->hdr.msg_namelen = saddr_len;
> +}
> +
> +/* Returns the number of fds passed or negative on error */
> +int scm_fdset_send(int sock, struct scm_fdset *fdset)
> +{
> + int ret = sys_sendmsg(sock,&fdset->hdr, 0);
> + if (ret<= 0)
> + return ret;
> +
> + return fdset->nr_fds_tx;
> +}
> +
> +/*
> + * Return pointer to the data where FDs are stored,
> + * or NULL on error. The fdset should be already init'ed.
> + */
> +int scm_fdset_recv(int sock, struct scm_fdset *fdset)
> +{
> + struct cmsghdr *cmsg;
> + int min_fd;
> + int ret = 0;
> +
> + scm_fdset_update(fdset, CR_SCM_MAX_FD);
> +
> + ret = sys_recvmsg(sock,&fdset->hdr, 0);
> + if (ret< 0)
> + return ret;
> +
> + cmsg = CMSG_FIRSTHDR(&fdset->hdr);
> + if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
> + return -EINVAL;
> +
> + min_fd = (cmsg->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
> + min_fd = min(min_fd, CR_SCM_MAX_FD);
> +
> + fdset->nr_fds_rx = min_fd;
> +
> + return 0;
> +}
> +
> int send_fd(int sock, struct sockaddr_un *saddr, int len, int fd)
> {
> char cmsgbuf[CMSG_SPACE(sizeof(int))];
> @@ -57,7 +159,7 @@ int recv_fd(int sock)
> return ret;
>
> cmsg = CMSG_FIRSTHDR(&msg);
> - if (!cmsg || !cmsg->cmsg_type == SCM_RIGHTS)
> + if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
> return -2;
>
> cmsg_data = (int *)CMSG_DATA(cmsg);
> --
> 1.7.7.6
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://openvz.org/mailman/listinfo/criu
--
Best regards,
Stanislav Kinsbursky
More information about the CRIU
mailing list