[CRIU] [PATCH] vma: Fix badly inherited FD in filemap_open
Andrei Vagin
avagin at virtuozzo.com
Wed Jun 14 01:45:45 MSK 2017
Applied, thanks
On Fri, Jun 09, 2017 at 07:23:37PM +0300, Pavel Emelyanov wrote:
> Previous patch (5a1e1aac) tried to minimize the amount of
> open()s called when mmap()ing the files. Unfortunatley, there
> was a mistake and wrong flags were compared which resulted in
> the whole optimization working randomly (typically not
> working).
>
> Fixing the flags comparison revealed another problem. The
> patch in question correllated with the 03e8c417 one, which
> caused some vmas to be opened and mmaped much later than the
> premap. When hitting the situation when vmas sharing their
> fds are partially premapped and partially not, the whole
> vm_open sharing became broken in multiple places -- either
> needed fd was not opened, or the not needed left un-closed.
>
> To fix this the context, that tracks whether the fd should
> be shared or not, should be moved from collect stage to
> the real opening loop. In this case we need to explicitly
> know which vmas _may_ share fds (file private and shared)
> with each other, so the sharing knowledge becomes spread
> between open_filemap() and its callers. Oh, well...
>
> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
> ---
> criu/files-reg.c | 89 +++++++++++++++++++++++++++++++++++-------------
> criu/include/files-reg.h | 9 ++---
> criu/include/image.h | 2 +-
> criu/include/vma.h | 1 -
> criu/mem.c | 24 +++++++++----
> criu/pie/restorer.c | 2 +-
> 6 files changed, 88 insertions(+), 39 deletions(-)
>
> diff --git a/criu/files-reg.c b/criu/files-reg.c
> index e8ff68f..0a5fd83 100644
> --- a/criu/files-reg.c
> +++ b/criu/files-reg.c
> @@ -1626,14 +1626,58 @@ int open_reg_by_id(u32 id)
> return open_reg_fd(fd);
> }
>
> -static int borrow_filemap(int pid, struct vma_area *vma)
> -{
> - struct vma_area *fvma = vma->fvma;
> +struct filemap_ctx {
> + u32 flags;
> + struct file_desc *desc;
> + int fd;
> + /*
> + * Whether or not to close the fd when we're about to
> + * put a new one into ctx.
> + *
> + * True is used by premap, so that it just calls vm_open
> + * in sequence, immediatelly mmap()s the file, then it
> + * can be closed.
> + *
> + * False is used by open_vmas() which pre-opens the files
> + * for restorer, and the latter mmap()s them and closes.
> + *
> + * ...
> + */
> + bool close;
> + /* ...
> + *
> + * but closing all vmas won't work, as some of them share
> + * the descriptor, so only the ones that terminate the
> + * fd-sharing chain are marked with VMA_CLOSE flag, saying
> + * restorer to close the vma's fd.
> + *
> + * Said that, this vma pointer references the previously
> + * seen vma, so that once fd changes, this one gets the
> + * closing flag.
> + */
> + struct vma_area *vma;
> +};
>
> - BUG_ON(!(fvma->e->status & VMA_NO_CLOSE));
> - vma->e->fd = fvma->e->fd;
> +static struct filemap_ctx ctx;
>
> - return 0;
> +void filemap_ctx_init(bool auto_close)
> +{
> + ctx.desc = NULL; /* to fail the first comparison in open_ */
> + ctx.fd = -1; /* not to close random fd in _fini */
> + ctx.vma = NULL; /* not to put spurious VMA_CLOSE in _fini */
> + /* flags may remain any */
> + ctx.close = auto_close;
> +}
> +
> +void filemap_ctx_fini(void)
> +{
> + if (ctx.close) {
> + if (ctx.fd >= 0)
> + close(ctx.fd);
> + } else {
> + if (ctx.vma)
> + ctx.vma->e->status |= VMA_CLOSE;
> + }
> }
>
> static int open_filemap(int pid, struct vma_area *vma)
> @@ -1650,15 +1694,24 @@ static int open_filemap(int pid, struct vma_area *vma)
> BUG_ON((vma->vmfd == NULL) || !vma->e->has_fdflags);
> flags = vma->e->fdflags;
>
> - ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
> - if (ret < 0)
> - return ret;
> + if (ctx.flags != flags || ctx.desc != vma->vmfd) {
> + ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
> + if (ret < 0)
> + return ret;
>
> - vma->e->fd = ret;
> + filemap_ctx_fini();
> +
> + ctx.flags = flags;
> + ctx.desc = vma->vmfd;
> + ctx.fd = ret;
> + }
> +
> + ctx.vma = vma;
> + vma->e->fd = ctx.fd;
> return 0;
> }
>
> -int collect_filemap(struct vma_area *vma, struct vma_file_ctx *ctx)
> +int collect_filemap(struct vma_area *vma)
> {
> struct file_desc *fd;
>
> @@ -1677,19 +1730,7 @@ int collect_filemap(struct vma_area *vma, struct vma_file_ctx *ctx)
> return -1;
>
> vma->vmfd = fd;
> - if (ctx->vma && ctx->flags == vma->e->flags && ctx->fd == fd) {
> - vma->vm_open = borrow_filemap;
> - vma->fvma = ctx->vma;
> - ctx->vma->e->status |= VMA_NO_CLOSE;
> - /* Change VMA so that next borrower sets NO_CLOSE on us */
> - ctx->vma = vma;
> - } else {
> - vma->vm_open = open_filemap;
> - ctx->flags = vma->e->fdflags;
> - ctx->fd = fd;
> - ctx->vma = vma;
> - }
> -
> + vma->vm_open = open_filemap;
> return 0;
> }
>
> diff --git a/criu/include/files-reg.h b/criu/include/files-reg.h
> index afae362..1c49be9 100644
> --- a/criu/include/files-reg.h
> +++ b/criu/include/files-reg.h
> @@ -40,12 +40,9 @@ extern struct file_remap *lookup_ghost_remap(u32 dev, u32 ino);
>
> extern struct file_desc *try_collect_special_file(u32 id, int optional);
> #define collect_special_file(id) try_collect_special_file(id, 0)
> -struct vma_file_ctx {
> - u32 flags;
> - struct file_desc *fd;
> - struct vma_area *vma;
> -};
> -extern int collect_filemap(struct vma_area *, struct vma_file_ctx *ctx);
> +extern int collect_filemap(struct vma_area *);
> +extern void filemap_ctx_init(bool auto_close);
> +extern void filemap_ctx_fini(void);
>
> extern int collect_remaps_and_regfiles(void);
>
> diff --git a/criu/include/image.h b/criu/include/image.h
> index 9846512..d9c4bdb 100644
> --- a/criu/include/image.h
> +++ b/criu/include/image.h
> @@ -89,7 +89,7 @@
> #define VMA_AREA_VVAR (1 << 12)
> #define VMA_AREA_AIORING (1 << 13)
>
> -#define VMA_NO_CLOSE (1 << 28)
> +#define VMA_CLOSE (1 << 28)
> #define VMA_NO_PROT_WRITE (1 << 29)
> #define VMA_PREMMAPED (1 << 30)
> #define VMA_UNSUPP (1 << 31)
> diff --git a/criu/include/vma.h b/criu/include/vma.h
> index a1f914c..779466c 100644
> --- a/criu/include/vma.h
> +++ b/criu/include/vma.h
> @@ -55,7 +55,6 @@ struct vma_area {
> int (*vm_open)(int pid, struct vma_area *vma);
> struct file_desc *vmfd;
> struct vma_area *pvma; /* parent for inherited VMAs */
> - struct vma_area *fvma; /* vma from which to borrow a file */
> unsigned long *page_bitmap; /* existent pages */
> unsigned long premmaped_addr; /* restore only */
>
> diff --git a/criu/mem.c b/criu/mem.c
> index 0b3c900..3c7186a 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -481,7 +481,6 @@ int prepare_mm_pid(struct pstree_item *i)
> int ret = -1, vn = 0;
> struct cr_img *img;
> struct rst_info *ri = rsti(i);
> - struct vma_file_ctx ctx = {};
>
> img = open_image(CR_FD_MM, O_RSTR, pid);
> if (!img)
> @@ -541,7 +540,7 @@ int prepare_mm_pid(struct pstree_item *i)
> ret = collect_shmem(pid, vma);
> else if (vma_area_is(vma, VMA_FILE_PRIVATE) ||
> vma_area_is(vma, VMA_FILE_SHARED))
> - ret = collect_filemap(vma, &ctx);
> + ret = collect_filemap(vma);
> else if (vma_area_is(vma, VMA_AREA_SOCKET))
> ret = collect_socket_map(vma);
> else
> @@ -707,10 +706,6 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void
> pr_perror("Unable to map ANON_VMA");
> return -1;
> }
> -
> - if (vma_area_is(vma, VMA_FILE_PRIVATE) &&
> - !vma_area_is(vma, VMA_NO_CLOSE))
> - close(vma->e->fd);
> } else {
> void *paddr;
>
> @@ -784,6 +779,8 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas,
> int ret = 0;
> LIST_HEAD(empty);
>
> + filemap_ctx_init(true);
> +
> list_for_each_entry(vma, &vmas->h, list) {
> if (pstart > vma->e->start) {
> ret = -1;
> @@ -819,6 +816,8 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas,
> break;
> }
>
> + filemap_ctx_fini();
> +
> return ret;
> }
>
> @@ -1128,6 +1127,8 @@ int open_vmas(struct pstree_item *t)
> struct vma_area *vma;
> struct vm_area_list *vmas = &rsti(t)->vmas;
>
> + filemap_ctx_init(false);
> +
> list_for_each_entry(vma, &vmas->h, list) {
> if (!vma_area_is(vma, VMA_AREA_REGULAR) || !vma->vm_open)
> continue;
> @@ -1140,8 +1141,19 @@ int open_vmas(struct pstree_item *t)
> pr_err("`- Can't open vma\n");
> return -1;
> }
> +
> + /*
> + * File mappings have vm_open set to open_filemap which, in
> + * turn, puts the VMA_CLOSE bit itself. For all the rest we
> + * need to put it by hads, so that the restorer closes the fd
> + */
> + if (!(vma_area_is(vma, VMA_FILE_PRIVATE) ||
> + vma_area_is(vma, VMA_FILE_SHARED)))
> + vma->e->status |= VMA_CLOSE;
> }
>
> + filemap_ctx_fini();
> +
> return 0;
> }
>
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 3e61822..e8c1118 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -650,7 +650,7 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
> vma_entry->pgoff);
>
> if ((vma_entry->fd != -1) &&
> - !(vma_entry->status & VMA_NO_CLOSE))
> + (vma_entry->status & VMA_CLOSE))
> sys_close(vma_entry->fd);
>
> return addr;
> --
> 2.1.4
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list