[CRIU] [PATCH] vma: Fix badly inherited FD in filemap_open

Andrei Vagin avagin at virtuozzo.com
Wed Jun 14 01:45:45 MSK 2017


Applied, thanks
On Fri, Jun 09, 2017 at 07:23:37PM +0300, Pavel Emelyanov wrote:
> Previous patch (5a1e1aac) tried to minimize the amount of
> open()s called when mmap()ing the files. Unfortunatley, there
> was a mistake and wrong flags were compared which resulted in
> the whole optimization working randomly (typically not
> working).
> 
> Fixing the flags comparison revealed another problem. The
> patch in question correllated with the 03e8c417 one, which
> caused some vmas to be opened and mmaped much later than the
> premap. When hitting the situation when vmas sharing their
> fds are partially premapped and partially not, the whole
> vm_open sharing became broken in multiple places -- either
> needed fd was not opened, or the not needed left un-closed.
> 
> To fix this the context, that tracks whether the fd should
> be shared or not, should be moved from collect stage to
> the real opening loop. In this case we need to explicitly
> know which vmas _may_ share fds (file private and shared)
> with each other, so the sharing knowledge becomes spread
> between open_filemap() and its callers. Oh, well...
> 
> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
> ---
>  criu/files-reg.c         | 89 +++++++++++++++++++++++++++++++++++-------------
>  criu/include/files-reg.h |  9 ++---
>  criu/include/image.h     |  2 +-
>  criu/include/vma.h       |  1 -
>  criu/mem.c               | 24 +++++++++----
>  criu/pie/restorer.c      |  2 +-
>  6 files changed, 88 insertions(+), 39 deletions(-)
> 
> diff --git a/criu/files-reg.c b/criu/files-reg.c
> index e8ff68f..0a5fd83 100644
> --- a/criu/files-reg.c
> +++ b/criu/files-reg.c
> @@ -1626,14 +1626,58 @@ int open_reg_by_id(u32 id)
>  	return open_reg_fd(fd);
>  }
>  
> -static int borrow_filemap(int pid, struct vma_area *vma)
> -{
> -	struct vma_area *fvma = vma->fvma;
> +struct filemap_ctx {
> +	u32 flags;
> +	struct file_desc *desc;
> +	int fd;
> +	/*
> +	 * Whether or not to close the fd when we're about to
> +	 * put a new one into ctx.
> +	 *
> +	 * True is used by premap, so that it just calls vm_open
> +	 * in sequence, immediatelly mmap()s the file, then it
> +	 * can be closed.
> +	 *
> +	 * False is used by open_vmas() which pre-opens the files
> +	 * for restorer, and the latter mmap()s them and closes.
> +	 *
> +	 * ...
> +	 */
> +	bool close;
> +	/* ...
> +	 *
> +	 * but closing all vmas won't work, as some of them share
> +	 * the descriptor, so only the ones that terminate the
> +	 * fd-sharing chain are marked with VMA_CLOSE flag, saying
> +	 * restorer to close the vma's fd.
> +	 *
> +	 * Said that, this vma pointer references the previously
> +	 * seen vma, so that once fd changes, this one gets the
> +	 * closing flag.
> +	 */
> +	struct vma_area *vma;
> +};
>  
> -	BUG_ON(!(fvma->e->status & VMA_NO_CLOSE));
> -	vma->e->fd = fvma->e->fd;
> +static struct filemap_ctx ctx;
>  
> -	return 0;
> +void filemap_ctx_init(bool auto_close)
> +{
> +	ctx.desc = NULL;	/* to fail the first comparison in open_ */
> +	ctx.fd = -1;		/* not to close random fd in _fini */
> +	ctx.vma = NULL;		/* not to put spurious VMA_CLOSE in _fini */
> +				/* flags may remain any */
> +	ctx.close = auto_close;
> +}
> +
> +void filemap_ctx_fini(void)
> +{
> +	if (ctx.close) {
> +		if (ctx.fd >= 0)
> +			close(ctx.fd);
> +	} else {
> +		if (ctx.vma)
> +			ctx.vma->e->status |= VMA_CLOSE;
> +	}
>  }
>  
>  static int open_filemap(int pid, struct vma_area *vma)
> @@ -1650,15 +1694,24 @@ static int open_filemap(int pid, struct vma_area *vma)
>  	BUG_ON((vma->vmfd == NULL) || !vma->e->has_fdflags);
>  	flags = vma->e->fdflags;
>  
> -	ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
> -	if (ret < 0)
> -		return ret;
> +	if (ctx.flags != flags || ctx.desc != vma->vmfd) {
> +		ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
> +		if (ret < 0)
> +			return ret;
>  
> -	vma->e->fd = ret;
> +		filemap_ctx_fini();
> +
> +		ctx.flags = flags;
> +		ctx.desc = vma->vmfd;
> +		ctx.fd = ret;
> +	}
> +
> +	ctx.vma = vma;
> +	vma->e->fd = ctx.fd;
>  	return 0;
>  }
>  
> -int collect_filemap(struct vma_area *vma, struct vma_file_ctx *ctx)
> +int collect_filemap(struct vma_area *vma)
>  {
>  	struct file_desc *fd;
>  
> @@ -1677,19 +1730,7 @@ int collect_filemap(struct vma_area *vma, struct vma_file_ctx *ctx)
>  		return -1;
>  
>  	vma->vmfd = fd;
> -	if (ctx->vma && ctx->flags == vma->e->flags && ctx->fd == fd) {
> -		vma->vm_open = borrow_filemap;
> -		vma->fvma = ctx->vma;
> -		ctx->vma->e->status |= VMA_NO_CLOSE;
> -		/* Change VMA so that next borrower sets NO_CLOSE on us */
> -		ctx->vma = vma;
> -	} else {
> -		vma->vm_open = open_filemap;
> -		ctx->flags = vma->e->fdflags;
> -		ctx->fd = fd;
> -		ctx->vma = vma;
> -	}
> -
> +	vma->vm_open = open_filemap;
>  	return 0;
>  }
>  
> diff --git a/criu/include/files-reg.h b/criu/include/files-reg.h
> index afae362..1c49be9 100644
> --- a/criu/include/files-reg.h
> +++ b/criu/include/files-reg.h
> @@ -40,12 +40,9 @@ extern struct file_remap *lookup_ghost_remap(u32 dev, u32 ino);
>  
>  extern struct file_desc *try_collect_special_file(u32 id, int optional);
>  #define collect_special_file(id)	try_collect_special_file(id, 0)
> -struct vma_file_ctx {
> -	u32 flags;
> -	struct file_desc *fd;
> -	struct vma_area *vma;
> -};
> -extern int collect_filemap(struct vma_area *, struct vma_file_ctx *ctx);
> +extern int collect_filemap(struct vma_area *);
> +extern void filemap_ctx_init(bool auto_close);
> +extern void filemap_ctx_fini(void);
>  
>  extern int collect_remaps_and_regfiles(void);
>  
> diff --git a/criu/include/image.h b/criu/include/image.h
> index 9846512..d9c4bdb 100644
> --- a/criu/include/image.h
> +++ b/criu/include/image.h
> @@ -89,7 +89,7 @@
>  #define VMA_AREA_VVAR		(1 <<  12)
>  #define VMA_AREA_AIORING	(1 <<  13)
>  
> -#define VMA_NO_CLOSE		(1 <<  28)
> +#define VMA_CLOSE		(1 <<  28)
>  #define VMA_NO_PROT_WRITE	(1 <<  29)
>  #define VMA_PREMMAPED		(1 <<  30)
>  #define VMA_UNSUPP		(1 <<  31)
> diff --git a/criu/include/vma.h b/criu/include/vma.h
> index a1f914c..779466c 100644
> --- a/criu/include/vma.h
> +++ b/criu/include/vma.h
> @@ -55,7 +55,6 @@ struct vma_area {
>  			int (*vm_open)(int pid, struct vma_area *vma);
>  			struct file_desc *vmfd;
>  			struct vma_area	*pvma;		/* parent for inherited VMAs */
> -			struct vma_area	*fvma;		/* vma from which to borrow a file */
>  			unsigned long	*page_bitmap;	/* existent pages */
>  			unsigned long	premmaped_addr;	/* restore only */
>  
> diff --git a/criu/mem.c b/criu/mem.c
> index 0b3c900..3c7186a 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -481,7 +481,6 @@ int prepare_mm_pid(struct pstree_item *i)
>  	int ret = -1, vn = 0;
>  	struct cr_img *img;
>  	struct rst_info *ri = rsti(i);
> -	struct vma_file_ctx ctx = {};
>  
>  	img = open_image(CR_FD_MM, O_RSTR, pid);
>  	if (!img)
> @@ -541,7 +540,7 @@ int prepare_mm_pid(struct pstree_item *i)
>  			ret = collect_shmem(pid, vma);
>  		else if (vma_area_is(vma, VMA_FILE_PRIVATE) ||
>  				vma_area_is(vma, VMA_FILE_SHARED))
> -			ret = collect_filemap(vma, &ctx);
> +			ret = collect_filemap(vma);
>  		else if (vma_area_is(vma, VMA_AREA_SOCKET))
>  			ret = collect_socket_map(vma);
>  		else
> @@ -707,10 +706,6 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void
>  			pr_perror("Unable to map ANON_VMA");
>  			return -1;
>  		}
> -
> -		if (vma_area_is(vma, VMA_FILE_PRIVATE) &&
> -				!vma_area_is(vma, VMA_NO_CLOSE))
> -			close(vma->e->fd);
>  	} else {
>  		void *paddr;
>  
> @@ -784,6 +779,8 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas,
>  	int ret = 0;
>  	LIST_HEAD(empty);
>  
> +	filemap_ctx_init(true);
> +
>  	list_for_each_entry(vma, &vmas->h, list) {
>  		if (pstart > vma->e->start) {
>  			ret = -1;
> @@ -819,6 +816,8 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas,
>  			break;
>  	}
>  
> +	filemap_ctx_fini();
> +
>  	return ret;
>  }
>  
> @@ -1128,6 +1127,8 @@ int open_vmas(struct pstree_item *t)
>  	struct vma_area *vma;
>  	struct vm_area_list *vmas = &rsti(t)->vmas;
>  
> +	filemap_ctx_init(false);
> +
>  	list_for_each_entry(vma, &vmas->h, list) {
>  		if (!vma_area_is(vma, VMA_AREA_REGULAR) || !vma->vm_open)
>  			continue;
> @@ -1140,8 +1141,19 @@ int open_vmas(struct pstree_item *t)
>  			pr_err("`- Can't open vma\n");
>  			return -1;
>  		}
> +
> +		/*
> +		 * File mappings have vm_open set to open_filemap which, in
> +		 * turn, puts the VMA_CLOSE bit itself. For all the rest we
> +		 * need to put it by hads, so that the restorer closes the fd
> +		 */
> +		if (!(vma_area_is(vma, VMA_FILE_PRIVATE) ||
> +					vma_area_is(vma, VMA_FILE_SHARED)))
> +			vma->e->status |= VMA_CLOSE;
>  	}
>  
> +	filemap_ctx_fini();
> +
>  	return 0;
>  }
>  
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 3e61822..e8c1118 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -650,7 +650,7 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
>  			vma_entry->pgoff);
>  
>  	if ((vma_entry->fd != -1) &&
> -			!(vma_entry->status & VMA_NO_CLOSE))
> +			(vma_entry->status & VMA_CLOSE))
>  		sys_close(vma_entry->fd);
>  
>  	return addr;
> -- 
> 2.1.4
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list