[CRIU] Re: [RFC] fds TX/RX

Pavel Emelyanov xemul at parallels.com
Fri Mar 16 12:36:36 EDT 2012


The patch was corrupted by mailserver. Big patches should be sent as
attachments to avoid this.

>  struct fd_parms {
> -       unsigned long   fd_name;
> -       unsigned long   pos;
> -       unsigned int    flags;
> -       unsigned int    type;
> +       unsigned int            scm_fd_locl;    /* our fd number transferred from parasite */
> +       unsigned int            scm_fd_orig;    /* corresponding original fd */

Simple names like fd and lfd are more than enough.

And the fd_parms is not a structure used to reduce the amount of arguments
walking up and down the stack. It should describe a descriptor parameters.
And nothing (like context things) more. The local instance of a descriptor
is not the former but the latter.

> +       struct vma_entry        *vma_entry;     /* Used for VMA file maps */

What does this vma_entry do with this patch?

> 
> -       u64             id;
> -       pid_t           pid;
> +       unsigned long           pos;
> +       unsigned int            flags;
> +       unsigned int            type;
> +
> +       u64                     id;
> +       pid_t                   pid;

No cleanups in the middle of the big patch.

>  };
> 
>  static char big_buffer[PATH_MAX];
> @@ -97,16 +101,15 @@ err:
>         return ret;
>  }
> 
> -static int dump_one_reg_file(const struct fd_parms *p, int lfd,
> -                            const struct cr_fdset *cr_fdset,
> -                            bool do_close_lfd)
> +static int dump_one_reg_file(const struct fd_parms *params,
> +                            const struct cr_fdset *cr_fdset)

If you add or remove args from fn don't rename other in the same patch.

>  {
>         struct fdinfo_entry e;
>         char fd_str[128];
>         int len;
>         int ret = -1;
> 
> -       snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", lfd);
> +       snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", params->scm_fd_locl);
>         len = readlink(fd_str, big_buffer, sizeof(big_buffer) - 1);
>         if (len < 0) {
>                 pr_perror("Can't readlink %s", fd_str);
> @@ -163,35 +168,41 @@ static int dump_task_special_files(pid_t pid, const struct cr_fdset *cr_fdset)
>         int fd, ret;
> 
>         /* Dump /proc/pid/cwd */
> +       fd = open_proc(pid, "cwd");
> +       if (fd < 0)
> +               return -1;
>         params = (struct fd_parms) {
> +               .scm_fd_orig    = fd,

Should be some invalid value.

> +               .scm_fd_locl    = fd,
>                 .id             = FD_ID_INVALID,
>                 .pid            = FD_PID_INVALID,
>                 .type           = FDINFO_CWD,
>         };
> 
> -       fd = open_proc(pid, "cwd");
> -       if (fd < 0)
> -               return -1;
> -       ret = dump_one_reg_file(&params, fd, cr_fdset, 1);
> +       ret = dump_one_reg_file(&params, cr_fdset);
> +       close(fd);
>         if (ret)
>                 return ret;
> 
>         /* Dump /proc/pid/exe */
> +       fd = open_proc(pid, "exe");
> +       if (fd < 0)
> +               return -1;
>         params = (struct fd_parms) {
> +               .scm_fd_orig    = fd,

Same here.

> +               .scm_fd_locl    = fd,
>                 .id             = FD_ID_INVALID,
>                 .pid            = FD_PID_INVALID,
>                 .type           = FDINFO_EXE,
>         };
> 
> -       fd = open_proc(pid, "exe");
> -       if (fd < 0)
> -               return -1;
> -       ret = dump_one_reg_file(&params, fd, cr_fdset, 1);
> +       ret = dump_one_reg_file(&params, cr_fdset);
> +       close(fd);
> 
>         return ret;
>  }
> 
> @@ -246,108 +257,86 @@ err:
> 
> -static int dump_one_fd(pid_t pid, int pid_fd_dir, const char *d_name,
> +static int dump_one_fd(struct fd_parms *params,
>                        const struct cr_fdset *cr_fdset,
>                        struct sk_queue *sk_queue)
>  {
>         struct stat fd_stat;
>         int err = -1;
> -       struct fd_parms p;
> -       int lfd;
> 
> -       if (read_fd_params(pid, d_name, &p))
> -               return -1;
> -
> -       lfd = openat(pid_fd_dir, d_name, O_RDONLY);
> -       if (lfd < 0) {
> -               err = try_dump_socket(pid, p.fd_name, cr_fdset, sk_queue);
> -               if (err != 1)
> -                       return err;
> +       pr_info("%d fdinfo %d (%d): pos: %16lx flags: %16o\n",
> +               params->pid, params->scm_fd_orig, params->scm_fd_locl,
> +               params->pos, params->flags);
> 
> -               pr_perror("Failed to open %d/%ld", pid_fd_dir, p.fd_name);
> -               return -1;
> +       /*
> +        * Check if it's a socket.
> +        */
> +       {

Putting a { } block into the middle of a function is reprehensible.
Don't do this again, please.

> +               int fd = open_proc_nocheck(params->pid, "fd/%d", params->scm_fd_orig);
> +               if (fd < 0) {
> +                       err = try_dump_socket(params->pid, params->scm_fd_orig,
> +                                             cr_fdset, sk_queue);
> +                       if (err != 1)
> +                               return err;
> +
> +                       pr_perror("Failed to open %d", params->scm_fd_orig);
> +                       return -1;
> +               } else
> +                       close(fd);
>         }
> 
>         /*
> +        * Parasite should transfer file descriptors
> +        * to our space.
> +        */

Pointless comment.

> +       ret = parasite_tx_fds_seized(ctl, fds_map->fd_orig, fds_map->fd_locl, fds_map->nr_fds);

Use word "drain" for anything related to getting fds from parasite.
In particular this fn should be called parasaite_drain_fds_seized.

> +       if (ret)
> +               return -1;
> +
> +       /*
>          * Dump special files at the beginning. We might need
>          * to re-read them in restorer, so better to make it
>          * fast.
>          */
> -       if (dump_task_special_files(pid, cr_fdset)) {
> +       if (dump_task_special_files(ctl->pid, cr_fdset)) {
>                 pr_err("Can't dump special files\n");
>                 return -1;
>         }
> 
> -       fd_dir = opendir_proc(pid, "fd");
> -       if (!fd_dir)
> -               return -1;
> +       for (i = 0; i < fds_map->nr_fds; i++) {
> 
> -       while ((de = readdir(fd_dir))) {
> -               if (!strcmp(de->d_name, "."))
> -                       continue;
> -               if (!strcmp(de->d_name, ".."))
> -                       continue;
> -               if (dump_one_fd(pid, dirfd(fd_dir), de->d_name, cr_fdset,
> -                               sk_queue))
> +               params.scm_fd_locl      = fds_map->fd_locl[i];
> +               params.scm_fd_orig      = fds_map->fd_orig[i];
> +               params.pos              = lseek(params.scm_fd_locl, 0, SEEK_CUR);
> +               params.flags            = fcntl(params.scm_fd_locl, F_GETFL);
> +               params.pid              = ctl->pid;
> +               params.id               = FD_ID_INVALID;

There used to be a plain logic over this code.

* open local fd
* read its params
* write it into file

each was written in its own function. Now the step #2 is moved outside and inlined here.

Plz, just patch the read_fd_parms fn for reading pos, flags, etc in a new manner.

> +               ret = dump_one_fd(&params, cr_fdset, sk_queue);
> +               if (ret)
>                         return -1;
>         }
> 
>         pr_info("----------------------------------------\n");
> 
> -       closedir(fd_dir);
>         return 0;
>  }
> 
> +void fds_scm_map_destroy(struct fds_scm_map **map)
> +{
> +       if (!map || !*map)
> +               return;
> +
> +       if ((*map)->fd_locl != MAP_FAILED)
> +               munmap((*map)->fd_locl, (*map)->size_fd_locl);
> +
> +       if ((*map)->fd_orig != MAP_FAILED)
> +               munmap((*map)->fd_orig, (*map)->size_fd_orig);
> +
> +       xfree(*map);
> +       *map = NULL;
> +}
> +
> +int fds_scm_map_expand(struct fds_scm_map *map)

Have you ever seen an app holding more than 1024 descriptors open? Me not.
Plz, just allocate a single page and abort dumping if it's not enough.

> +{
> +       int *new;
> +
> +       new = mremap(map->fd_locl, map->size_fd_locl,
> +                    map->size_fd_locl + PAGE_SIZE, MREMAP_MAYMOVE);
> +       if (new == MAP_FAILED)
> +               goto err_nomem;
> +
> +       map->fd_locl = new;
> +       map->size_fd_locl += PAGE_SIZE;
> +
> +       new = mremap(map->fd_orig, map->size_fd_orig,
> +                    map->size_fd_orig + PAGE_SIZE, MREMAP_MAYMOVE);
> +       if (new == MAP_FAILED)
> +               goto err_nomem;
> +
> +       map->fd_orig = new;
> +       map->size_fd_orig += PAGE_SIZE;
> +
> +       return 0;
> +err_nomem:
> +       pr_perror("Can't expand memory for SCM fd map");
> +       return -ENOMEM;
> +}
> +
> +int fds_scm_collect_orig(pid_t pid, struct fds_scm_map **map)

This code has nothing to do with scm.

> +{
> +       unsigned int n = 0;
> +       struct dirent *d;
> +       DIR *dir;
> +
> +       BUG_ON(*map);
> +
> +       pr_info("\n");
> +       pr_info("Collecting fds (pid: %d)\n", pid);
> +       pr_info("----------------------------------------\n");
> +
> +       *map = fds_scm_map_create();
> +       if (!*map)
> +               return -1;
> +
> +       dir = opendir_proc(pid, "fd");
> +       if (!dir)
> +               return -1;
> +
> +       while ((d = readdir(dir))) {
> +               if (d->d_name[0] == '.')
> +                       continue;
> +
> +               if (SCM_MAP_NEED_EXPAND((*map)->size_fd_orig, n + 1)) {
> +                       if (fds_scm_map_expand(*map))
> +                               return -ENOMEM;
> +               }
> +
> +               (*map)->fd_orig[n] = atoi(d->d_name);
> +               n++;
> +       }
> +
> +       pr_info("Found %d file descriptors\n", n);
> +
> +       (*map)->nr_fds = n;
> +
> +       pr_info("----------------------------------------\n");
> +       return 0;
> +
> +err_nomem:
> +       pr_perror("Can't allocate memory for %d\n", pid);
> +       return -ENOMEM;
> +}
> diff --git a/include/fds-scm-map.h b/include/fds-scm-map.h
> new file mode 100644
> index 0000000..393d28f
> --- /dev/null
> +++ b/include/fds-scm-map.h
> @@ -0,0 +1,22 @@
> +#ifndef FDS_SCM_MAP
> +#define FDS_SCM_MAP
> +
> +#include <sys/types.h>
> +
> +extern struct fds_scm_map *fds_scm_map_create(void);

static

> +extern void fds_scm_map_destroy(struct fds_scm_map **map);
> +extern int fds_scm_map_expand(struct fds_scm_map *map);

static 

> +extern int fds_scm_collect_orig(pid_t pid, struct fds_scm_map **map);
> +
> +struct fds_scm_map {
> +       int             *fd_locl;
> +       int             *fd_orig;
> +
> +       unsigned int    nr_fds;
> +       unsigned long   size_fd_locl;
> +       unsigned long   size_fd_orig;

size_fd_locl == size_fd_orig = ROUND_UP(nr_fds * sizeof(int), PAGE_SIZE)

IOW -- you only need one variable.

> +};
> +
> +#define SCM_MAP_NEED_EXPAND(size, nr)  (((size) / sizeof(int)) <= (nr))

Local to respective .c file
The macros name is longer than the code it covers -> useless

> +
> +#endif /* FDSET_SCM_MAP */
> diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h
> index 5ae1554..1319f2e 100644
> --- a/include/parasite-syscall.h
> +++ b/include/parasite-syscall.h
> @@ -31,6 +31,7 @@ struct parasite_ctl {
> 
>  extern int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
>  extern int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
> +extern int parasite_tx_fds_seized(struct parasite_ctl *ctl, int *fds_tx, int *fds_rx, unsigned int nr_fds);
> 
>  struct parasite_dump_misc;
>  extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);
> diff --git a/include/parasite.h b/include/parasite.h
> @@ -678,8 +750,8 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
>                 goto err;
>         }
> 
> -       ctl->pid        = pid;
> -       ctl->syscall_ip = vma_area->vma.start;
> +       ctl->pid                = pid;
> +       ctl->syscall_ip         = vma_area->vma.start;

No cleanups in the middle of the big patch.

> 
>         /*
>          * Inject syscall instruction and remember original code,
> @@ -734,7 +806,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
>         ctl->addr_cmd           = (void *)PARASITE_CMD_ADDR((unsigned long)ctl->local_map);
>         ctl->addr_args          = (void *)PARASITE_ARGS_ADDR((unsigned long)ctl->local_map);
> 
> -       ret = parasite_init(ctl, pid);
> +       ret = parasite_init(ctl);

No cleanups in the middle of the big patch.

>         if (ret) {
>                 pr_err("%d: Can't create a transport socket\n", pid);
>                 goto err_restore;
> @@ -742,7 +814,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct list_head *vma_are
> 
>         ctl->signals_blocked = 1;
> 
> -       ret = parasite_set_logfd(ctl, pid);
> +       ret = parasite_set_logfd(ctl);

No cleanups in the middle of the big patch.
I'm sick and tired of reminding this again and again.

>         if (ret) {
>                 pr_err("%d: Can't set a logging descriptor\n", pid);
>                 goto err_restore;
> diff --git a/parasite.c b/parasite.c
> index 53b34a3..2de2cd1 100644
> --- a/parasite.c
> +++ b/parasite.c
> @@ -85,6 +85,27 @@ static unsigned long builtin_strlen(char *str)
>         return len;
>  }
> 
> +static int builtin_atoi(char *str)
> +{
> +       int ret = str[0] - '0';
> +       int sign = 1;
> +
> +       while (*str == ' ')
> +               str++;
> +
> +       if (*str == '-')
> +               sign = -1;
> +
> +       while (*str++) {
> +               int v = str[0] - '0';
> +               if (v < 0 || v > 9)
> +                       break;
> +               ret = ret * 10 + v;
> +       }
> +
> +       return ret * sign;
> +}

We removed this one several days ago and you don't use it in this patch...

> +
>  static const unsigned char hex[] = "0123456789abcdef";
>  static char *long2hex(unsigned long v)
>  {
> @@ -512,6 +533,47 @@ err_dmp:
>         return ret;
>  }
> 
> +static int scm_fd(struct parasite_scm_fd *args)

Bad name.

> +{
> +       parasite_status_t *st   = &args->status;
> +       struct scm_fdset *fdset = &args->fdset;
> +       int __nr_fds            = fdset->__nr_fds;
> +       int ret;
> +
> +       /*
> +        * Need to reinit and bind the address,
> +        * this fdset came from different address
> +        * space. Note we don't poke data it consist.
> +        */
> +       scm_fdset_init(fdset);
> +       scm_fdset_set_addr(fdset, &args->saddr, args->sun_len);
> +       scm_fdset_update(fdset, __nr_fds);
> +
> +#if 1
> +       sys_write_msg("\tNRfds: ");
> +       sys_write_msg(long2hex(fdset->__nr_fds));
> +       sys_write_msg("\n");
> +
> +       {
> +               int i;
> +               int *fd = scm_fdset_first(fdset);
> +               for (i = 0; i < fdset->__nr_fds; i++) {
> +                       sys_write_msg("\tTX:    ");
> +                       sys_write_msg(long2hex(fd[i]));
> +                       sys_write_msg("\n");
> +               }
> +       }
> +#endif

What's this? A garbage?

> +
> +       ret = scm_fdset_send(tsock, fdset);
> +       if (ret <= 0) {
> +               sys_write_msg("scm_fdset_send failed\n");
> +               SET_PARASITE_ERR(st, ret);
> +       }
> +
> +       return 0;
> +}
> +
>  static int init(struct parasite_init_args *args)
>  {
>         parasite_status_t *st = &args->status;
> @@ -579,6 +641,7 @@ static int __used parasite_service(unsigned long cmd, void *args)
>         BUILD_BUG_ON(sizeof(struct parasite_dump_misc) > PARASITE_ARG_SIZE);
>         BUILD_BUG_ON(sizeof(struct parasite_dump_tid_addr) > PARASITE_ARG_SIZE);
>         BUILD_BUG_ON(sizeof(struct parasite_dump_sk_queues) > PARASITE_ARG_SIZE);
> +       BUILD_BUG_ON(sizeof(struct parasite_scm_fd) > PARASITE_ARG_SIZE);
> 
>         switch (cmd) {
>         case PARASITE_CMD_INIT:
> @@ -603,6 +666,8 @@ static int __used parasite_service(unsigned long cmd, void *args)
>                 return dump_tid_addr((struct parasite_dump_tid_addr *)args);
>         case PARASITE_CMD_DUMP_SK_QUEUES:
>                 return dump_skqueues((struct parasite_dump_sk_queues *)args);
> +       case PARASITE_CMD_SCM_FD:
> +               return scm_fd((struct parasite_scm_fd *)args);
>         default:
>                 {
>                         parasite_status_t *st = (parasite_status_t *)args;
> diff --git a/util-net.c b/util-net.c
> index dad02c4..492bdda 100644
> --- a/util-net.c
> +++ b/util-net.c
> @@ -1,8 +1,110 @@
>  #include <sys/socket.h>
>  #include <sys/un.h>

[snip]

> +       cmsg = CMSG_FIRSTHDR(&fdset->hdr);
> +       if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
> +               return -EINVAL;
> +
> +       min_fd = (cmsg->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
> +       min_fd = min(min_fd, CR_SCM_MAX_FD);
> +
> +       fdset->nr_fds_rx = min_fd;
> +
> +       return 0;
> +}
> +

The above is beyond good and evil, sorry :(

Just expand the existing send_fd and recv_fd to work on arbitrary set of fds and
make the former call theirs new versions with an array of lengh 1.

And send this in a separate patch.

>  int send_fd(int sock, struct sockaddr_un *saddr, int len, int fd)
>  {
>         char cmsgbuf[CMSG_SPACE(sizeof(int))];
> @@ -57,7 +159,7 @@ int recv_fd(int sock)
>                 return ret;
> 
>         cmsg = CMSG_FIRSTHDR(&msg);
> -       if (!cmsg || !cmsg->cmsg_type == SCM_RIGHTS)
> +       if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
>                 return -2;
> 
>         cmsg_data = (int *)CMSG_DATA(cmsg);
> --
> 1.7.7.6
> 
> .
> 



More information about the CRIU mailing list