[CRIU] Re: [RFC] fds TX/RX
Pavel Emelyanov
xemul at parallels.com
Fri Mar 16 12:36:36 EDT 2012
The patch was corrupted by mailserver. Big patches should be sent as
attachments to avoid this.
> struct fd_parms {
> - unsigned long fd_name;
> - unsigned long pos;
> - unsigned int flags;
> - unsigned int type;
> + unsigned int scm_fd_locl; /* our fd number transferred from parasite */
> + unsigned int scm_fd_orig; /* corresponding original fd */
Simple names like fd and lfd are more than enough.
And the fd_parms is not a structure used to reduce the amount of arguments
walking up and down the stack. It should describe a descriptor parameters.
And nothing (like context things) more. The local instance of a descriptor
is not the former but the latter.
> + struct vma_entry *vma_entry; /* Used for VMA file maps */
What does this vma_entry do with this patch?
>
> - u64 id;
> - pid_t pid;
> + unsigned long pos;
> + unsigned int flags;
> + unsigned int type;
> +
> + u64 id;
> + pid_t pid;
No cleanups in the middle of the big patch.
> };
>
> static char big_buffer[PATH_MAX];
> @@ -97,16 +101,15 @@ err:
> return ret;
> }
>
> -static int dump_one_reg_file(const struct fd_parms *p, int lfd,
> - const struct cr_fdset *cr_fdset,
> - bool do_close_lfd)
> +static int dump_one_reg_file(const struct fd_parms *params,
> + const struct cr_fdset *cr_fdset)
If you add or remove args from fn don't rename other in the same patch.
> {
> struct fdinfo_entry e;
> char fd_str[128];
> int len;
> int ret = -1;
>
> - snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", lfd);
> + snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", params->scm_fd_locl);
> len = readlink(fd_str, big_buffer, sizeof(big_buffer) - 1);
> if (len < 0) {
> pr_perror("Can't readlink %s", fd_str);
> @@ -163,35 +168,41 @@ static int dump_task_special_files(pid_t pid, const struct cr_fdset *cr_fdset)
> int fd, ret;
>
> /* Dump /proc/pid/cwd */
> + fd = open_proc(pid, "cwd");
> + if (fd < 0)
> + return -1;
> params = (struct fd_parms) {
> + .scm_fd_orig = fd,
Should be some invalid value.
> + .scm_fd_locl = fd,
> .id = FD_ID_INVALID,
> .pid = FD_PID_INVALID,
> .type = FDINFO_CWD,
> };
>
> - fd = open_proc(pid, "cwd");
> - if (fd < 0)
> - return -1;
> - ret = dump_one_reg_file(¶ms, fd, cr_fdset, 1);
> + ret = dump_one_reg_file(¶ms, cr_fdset);
> + close(fd);
> if (ret)
> return ret;
>
> /* Dump /proc/pid/exe */
> + fd = open_proc(pid, "exe");
> + if (fd < 0)
> + return -1;
> params = (struct fd_parms) {
> + .scm_fd_orig = fd,
Same here.
> + .scm_fd_locl = fd,
> .id = FD_ID_INVALID,
> .pid = FD_PID_INVALID,
> .type = FDINFO_EXE,
> };
>
> - fd = open_proc(pid, "exe");
> - if (fd < 0)
> - return -1;
> - ret = dump_one_reg_file(¶ms, fd, cr_fdset, 1);
> + ret = dump_one_reg_file(¶ms, cr_fdset);
> + close(fd);
>
> return ret;
> }
>
> @@ -246,108 +257,86 @@ err:
>
> -static int dump_one_fd(pid_t pid, int pid_fd_dir, const char *d_name,
> +static int dump_one_fd(struct fd_parms *params,
> const struct cr_fdset *cr_fdset,
> struct sk_queue *sk_queue)
> {
> struct stat fd_stat;
> int err = -1;
> - struct fd_parms p;
> - int lfd;
>
> - if (read_fd_params(pid, d_name, &p))
> - return -1;
> -
> - lfd = openat(pid_fd_dir, d_name, O_RDONLY);
> - if (lfd < 0) {
> - err = try_dump_socket(pid, p.fd_name, cr_fdset, sk_queue);
> - if (err != 1)
> - return err;
> + pr_info("%d fdinfo %d (%d): pos: %16lx flags: %16o\n",
> + params->pid, params->scm_fd_orig, params->scm_fd_locl,
> + params->pos, params->flags);
>
> - pr_perror("Failed to open %d/%ld", pid_fd_dir, p.fd_name);
> - return -1;
> + /*
> + * Check if it's a socket.
> + */
> + {
Putting a { } block into the middle of a function is reprehensible.
Don't do this again, please.
> + int fd = open_proc_nocheck(params->pid, "fd/%d", params->scm_fd_orig);
> + if (fd < 0) {
> + err = try_dump_socket(params->pid, params->scm_fd_orig,
> + cr_fdset, sk_queue);
> + if (err != 1)
> + return err;
> +
> + pr_perror("Failed to open %d", params->scm_fd_orig);
> + return -1;
> + } else
> + close(fd);
> }
>
> /*
> + * Parasite should transfer file descriptors
> + * to our space.
> + */
Pointless comment.
> + ret = parasite_tx_fds_seized(ctl, fds_map->fd_orig, fds_map->fd_locl, fds_map->nr_fds);
Use word "drain" for anything related to getting fds from parasite.
In particular this fn should be called parasaite_drain_fds_seized.
> + if (ret)
> + return -1;
> +
> + /*
> * Dump special files at the beginning. We might need
> * to re-read them in restorer, so better to make it
> * fast.
> */
> - if (dump_task_special_files(pid, cr_fdset)) {
> + if (dump_task_special_files(ctl->pid, cr_fdset)) {
> pr_err("Can't dump special files\n");
> return -1;
> }
>
> - fd_dir = opendir_proc(pid, "fd");
> - if (!fd_dir)
> - return -1;
> + for (i = 0; i < fds_map->nr_fds; i++) {
>
> - while ((de = readdir(fd_dir))) {
> - if (!strcmp(de->d_name, "."))
> - continue;
> - if (!strcmp(de->d_name, ".."))
> - continue;
> - if (dump_one_fd(pid, dirfd(fd_dir), de->d_name, cr_fdset,
> - sk_queue))
> + params.scm_fd_locl = fds_map->fd_locl[i];
> + params.scm_fd_orig = fds_map->fd_orig[i];
> + params.pos = lseek(params.scm_fd_locl, 0, SEEK_CUR);
> + params.flags = fcntl(params.scm_fd_locl, F_GETFL);
> + params.pid = ctl->pid;
> + params.id = FD_ID_INVALID;
There used to be a plain logic over this code.
* open local fd
* read its params
* write it into file
each was written in its own function. Now the step #2 is moved outside and inlined here.
Plz, just patch the read_fd_parms fn for reading pos, flags, etc in a new manner.
> + ret = dump_one_fd(¶ms, cr_fdset, sk_queue);
> + if (ret)
> return -1;
> }
>
> pr_info("----------------------------------------\n");
>
> - closedir(fd_dir);
> return 0;
> }
>
> +void fds_scm_map_destroy(struct fds_scm_map **map)
> +{
> + if (!map || !*map)
> + return;
> +
> + if ((*map)->fd_locl != MAP_FAILED)
> + munmap((*map)->fd_locl, (*map)->size_fd_locl);
> +
> + if ((*map)->fd_orig != MAP_FAILED)
> + munmap((*map)->fd_orig, (*map)->size_fd_orig);
> +
> + xfree(*map);
> + *map = NULL;
> +}
> +
> +int fds_scm_map_expand(struct fds_scm_map *map)
Have you ever seen an app holding more than 1024 descriptors open? Me not.
Plz, just allocate a single page and abort dumping if it's not enough.
> +{
> + int *new;
> +
> + new = mremap(map->fd_locl, map->size_fd_locl,
> + map->size_fd_locl + PAGE_SIZE, MREMAP_MAYMOVE);
> + if (new == MAP_FAILED)
> + goto err_nomem;
> +
> + map->fd_locl = new;
> + map->size_fd_locl += PAGE_SIZE;
> +
> + new = mremap(map->fd_orig, map->size_fd_orig,
> + map->size_fd_orig + PAGE_SIZE, MREMAP_MAYMOVE);
> + if (new == MAP_FAILED)
> + goto err_nomem;
> +
> + map->fd_orig = new;
> + map->size_fd_orig += PAGE_SIZE;
> +
> + return 0;
> +err_nomem:
> + pr_perror("Can't expand memory for SCM fd map");
> + return -ENOMEM;
> +}
> +
> +int fds_scm_collect_orig(pid_t pid, struct fds_scm_map **map)
This code has nothing to do with scm.
> +{
> + unsigned int n = 0;
> + struct dirent *d;
> + DIR *dir;
> +
> + BUG_ON(*map);
> +
> + pr_info("\n");
> + pr_info("Collecting fds (pid: %d)\n", pid);
> + pr_info("----------------------------------------\n");
> +
> + *map = fds_scm_map_create();
> + if (!*map)
> + return -1;
> +
> + dir = opendir_proc(pid, "fd");
> + if (!dir)
> + return -1;
> +
> + while ((d = readdir(dir))) {
> + if (d->d_name[0] == '.')
> + continue;
> +
> + if (SCM_MAP_NEED_EXPAND((*map)->size_fd_orig, n + 1)) {
> + if (fds_scm_map_expand(*map))
> + return -ENOMEM;
> + }
> +
> + (*map)->fd_orig[n] = atoi(d->d_name);
> + n++;
> + }
> +
> + pr_info("Found %d file descriptors\n", n);
> +
> + (*map)->nr_fds = n;
> +
> + pr_info("----------------------------------------\n");
> + return 0;
> +
> +err_nomem:
> + pr_perror("Can't allocate memory for %d\n", pid);
> + return -ENOMEM;
> +}
> diff --git a/include/fds-scm-map.h b/include/fds-scm-map.h
> new file mode 100644
> index 0000000..393d28f
> --- /dev/null
> +++ b/include/fds-scm-map.h
> @@ -0,0 +1,22 @@
> +#ifndef FDS_SCM_MAP
> +#define FDS_SCM_MAP
> +
> +#include <sys/types.h>
> +
> +extern struct fds_scm_map *fds_scm_map_create(void);
static
> +extern void fds_scm_map_destroy(struct fds_scm_map **map);
> +extern int fds_scm_map_expand(struct fds_scm_map *map);
static
> +extern int fds_scm_collect_orig(pid_t pid, struct fds_scm_map **map);
> +
> +struct fds_scm_map {
> + int *fd_locl;
> + int *fd_orig;
> +
> + unsigned int nr_fds;
> + unsigned long size_fd_locl;
> + unsigned long size_fd_orig;
size_fd_locl == size_fd_orig = ROUND_UP(nr_fds * sizeof(int), PAGE_SIZE)
IOW -- you only need one variable.
> +};
> +
> +#define SCM_MAP_NEED_EXPAND(size, nr) (((size) / sizeof(int)) <= (nr))
Local to respective .c file
The macros name is longer than the code it covers -> useless
> +
> +#endif /* FDSET_SCM_MAP */
> diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h
> index 5ae1554..1319f2e 100644
> --- a/include/parasite-syscall.h
> +++ b/include/parasite-syscall.h
> @@ -31,6 +31,7 @@ struct parasite_ctl {
>
> extern int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
> extern int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
> +extern int parasite_tx_fds_seized(struct parasite_ctl *ctl, int *fds_tx, int *fds_rx, unsigned int nr_fds);
>
> struct parasite_dump_misc;
> extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);
> diff --git a/include/parasite.h b/include/parasite.h
> @@ -678,8 +750,8 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
> goto err;
> }
>
> - ctl->pid = pid;
> - ctl->syscall_ip = vma_area->vma.start;
> + ctl->pid = pid;
> + ctl->syscall_ip = vma_area->vma.start;
No cleanups in the middle of the big patch.
>
> /*
> * Inject syscall instruction and remember original code,
> @@ -734,7 +806,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
> ctl->addr_cmd = (void *)PARASITE_CMD_ADDR((unsigned long)ctl->local_map);
> ctl->addr_args = (void *)PARASITE_ARGS_ADDR((unsigned long)ctl->local_map);
>
> - ret = parasite_init(ctl, pid);
> + ret = parasite_init(ctl);
No cleanups in the middle of the big patch.
> if (ret) {
> pr_err("%d: Can't create a transport socket\n", pid);
> goto err_restore;
> @@ -742,7 +814,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
>
> ctl->signals_blocked = 1;
>
> - ret = parasite_set_logfd(ctl, pid);
> + ret = parasite_set_logfd(ctl);
No cleanups in the middle of the big patch.
I'm sick and tired of reminding this again and again.
> if (ret) {
> pr_err("%d: Can't set a logging descriptor\n", pid);
> goto err_restore;
> diff --git a/parasite.c b/parasite.c
> index 53b34a3..2de2cd1 100644
> --- a/parasite.c
> +++ b/parasite.c
> @@ -85,6 +85,27 @@ static unsigned long builtin_strlen(char *str)
> return len;
> }
>
> +static int builtin_atoi(char *str)
> +{
> + int ret = str[0] - '0';
> + int sign = 1;
> +
> + while (*str == ' ')
> + str++;
> +
> + if (*str == '-')
> + sign = -1;
> +
> + while (*str++) {
> + int v = str[0] - '0';
> + if (v < 0 || v > 9)
> + break;
> + ret = ret * 10 + v;
> + }
> +
> + return ret * sign;
> +}
We removed this one several days ago and you don't use it in this patch...
> +
> static const unsigned char hex[] = "0123456789abcdef";
> static char *long2hex(unsigned long v)
> {
> @@ -512,6 +533,47 @@ err_dmp:
> return ret;
> }
>
> +static int scm_fd(struct parasite_scm_fd *args)
Bad name.
> +{
> + parasite_status_t *st = &args->status;
> + struct scm_fdset *fdset = &args->fdset;
> + int __nr_fds = fdset->__nr_fds;
> + int ret;
> +
> + /*
> + * Need to reinit and bind the address,
> + * this fdset came from different address
> + * space. Note we don't poke data it consist.
> + */
> + scm_fdset_init(fdset);
> + scm_fdset_set_addr(fdset, &args->saddr, args->sun_len);
> + scm_fdset_update(fdset, __nr_fds);
> +
> +#if 1
> + sys_write_msg("\tNRfds: ");
> + sys_write_msg(long2hex(fdset->__nr_fds));
> + sys_write_msg("\n");
> +
> + {
> + int i;
> + int *fd = scm_fdset_first(fdset);
> + for (i = 0; i < fdset->__nr_fds; i++) {
> + sys_write_msg("\tTX: ");
> + sys_write_msg(long2hex(fd[i]));
> + sys_write_msg("\n");
> + }
> + }
> +#endif
What's this? A garbage?
> +
> + ret = scm_fdset_send(tsock, fdset);
> + if (ret <= 0) {
> + sys_write_msg("scm_fdset_send failed\n");
> + SET_PARASITE_ERR(st, ret);
> + }
> +
> + return 0;
> +}
> +
> static int init(struct parasite_init_args *args)
> {
> parasite_status_t *st = &args->status;
> @@ -579,6 +641,7 @@ static int __used parasite_service(unsigned long cmd, void *args)
> BUILD_BUG_ON(sizeof(struct parasite_dump_misc) > PARASITE_ARG_SIZE);
> BUILD_BUG_ON(sizeof(struct parasite_dump_tid_addr) > PARASITE_ARG_SIZE);
> BUILD_BUG_ON(sizeof(struct parasite_dump_sk_queues) > PARASITE_ARG_SIZE);
> + BUILD_BUG_ON(sizeof(struct parasite_scm_fd) > PARASITE_ARG_SIZE);
>
> switch (cmd) {
> case PARASITE_CMD_INIT:
> @@ -603,6 +666,8 @@ static int __used parasite_service(unsigned long cmd, void *args)
> return dump_tid_addr((struct parasite_dump_tid_addr *)args);
> case PARASITE_CMD_DUMP_SK_QUEUES:
> return dump_skqueues((struct parasite_dump_sk_queues *)args);
> + case PARASITE_CMD_SCM_FD:
> + return scm_fd((struct parasite_scm_fd *)args);
> default:
> {
> parasite_status_t *st = (parasite_status_t *)args;
> diff --git a/util-net.c b/util-net.c
> index dad02c4..492bdda 100644
> --- a/util-net.c
> +++ b/util-net.c
> @@ -1,8 +1,110 @@
> #include <sys/socket.h>
> #include <sys/un.h>
[snip]
> + cmsg = CMSG_FIRSTHDR(&fdset->hdr);
> + if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
> + return -EINVAL;
> +
> + min_fd = (cmsg->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
> + min_fd = min(min_fd, CR_SCM_MAX_FD);
> +
> + fdset->nr_fds_rx = min_fd;
> +
> + return 0;
> +}
> +
The above is beyond good and evil, sorry :(
Just expand the existing send_fd and recv_fd to work on arbitrary set of fds and
make the former call theirs new versions with an array of lengh 1.
And send this in a separate patch.
> int send_fd(int sock, struct sockaddr_un *saddr, int len, int fd)
> {
> char cmsgbuf[CMSG_SPACE(sizeof(int))];
> @@ -57,7 +159,7 @@ int recv_fd(int sock)
> return ret;
>
> cmsg = CMSG_FIRSTHDR(&msg);
> - if (!cmsg || !cmsg->cmsg_type == SCM_RIGHTS)
> + if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
> return -2;
>
> cmsg_data = (int *)CMSG_DATA(cmsg);
> --
> 1.7.7.6
>
> .
>
More information about the CRIU
mailing list