[CRIU] [PATCH] vma: Fix badly inherited FD in filemap_open

Pavel Emelyanov xemul at virtuozzo.com
Fri Jun 9 19:23:37 MSK 2017


Previous patch (5a1e1aac) tried to minimize the amount of
open()s called when mmap()ing the files. Unfortunatley, there
was a mistake and wrong flags were compared which resulted in
the whole optimization working randomly (typically not
working).

Fixing the flags comparison revealed another problem. The
patch in question correllated with the 03e8c417 one, which
caused some vmas to be opened and mmaped much later than the
premap. When hitting the situation when vmas sharing their
fds are partially premapped and partially not, the whole
vm_open sharing became broken in multiple places -- either
needed fd was not opened, or the not needed left un-closed.

To fix this the context, that tracks whether the fd should
be shared or not, should be moved from collect stage to
the real opening loop. In this case we need to explicitly
know which vmas _may_ share fds (file private and shared)
with each other, so the sharing knowledge becomes spread
between open_filemap() and its callers. Oh, well...

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 criu/files-reg.c         | 89 +++++++++++++++++++++++++++++++++++-------------
 criu/include/files-reg.h |  9 ++---
 criu/include/image.h     |  2 +-
 criu/include/vma.h       |  1 -
 criu/mem.c               | 24 +++++++++----
 criu/pie/restorer.c      |  2 +-
 6 files changed, 88 insertions(+), 39 deletions(-)

diff --git a/criu/files-reg.c b/criu/files-reg.c
index e8ff68f..0a5fd83 100644
--- a/criu/files-reg.c
+++ b/criu/files-reg.c
@@ -1626,14 +1626,58 @@ int open_reg_by_id(u32 id)
 	return open_reg_fd(fd);
 }
 
-static int borrow_filemap(int pid, struct vma_area *vma)
-{
-	struct vma_area *fvma = vma->fvma;
+struct filemap_ctx {
+	u32 flags;
+	struct file_desc *desc;
+	int fd;
+	/*
+	 * Whether or not to close the fd when we're about to
+	 * put a new one into ctx.
+	 *
+	 * True is used by premap, so that it just calls vm_open
+	 * in sequence, immediatelly mmap()s the file, then it
+	 * can be closed.
+	 *
+	 * False is used by open_vmas() which pre-opens the files
+	 * for restorer, and the latter mmap()s them and closes.
+	 *
+	 * ...
+	 */
+	bool close;
+	/* ...
+	 *
+	 * but closing all vmas won't work, as some of them share
+	 * the descriptor, so only the ones that terminate the
+	 * fd-sharing chain are marked with VMA_CLOSE flag, saying
+	 * restorer to close the vma's fd.
+	 *
+	 * Said that, this vma pointer references the previously
+	 * seen vma, so that once fd changes, this one gets the
+	 * closing flag.
+	 */
+	struct vma_area *vma;
+};
 
-	BUG_ON(!(fvma->e->status & VMA_NO_CLOSE));
-	vma->e->fd = fvma->e->fd;
+static struct filemap_ctx ctx;
 
-	return 0;
+void filemap_ctx_init(bool auto_close)
+{
+	ctx.desc = NULL;	/* to fail the first comparison in open_ */
+	ctx.fd = -1;		/* not to close random fd in _fini */
+	ctx.vma = NULL;		/* not to put spurious VMA_CLOSE in _fini */
+				/* flags may remain any */
+	ctx.close = auto_close;
+}
+
+void filemap_ctx_fini(void)
+{
+	if (ctx.close) {
+		if (ctx.fd >= 0)
+			close(ctx.fd);
+	} else {
+		if (ctx.vma)
+			ctx.vma->e->status |= VMA_CLOSE;
+	}
 }
 
 static int open_filemap(int pid, struct vma_area *vma)
@@ -1650,15 +1694,24 @@ static int open_filemap(int pid, struct vma_area *vma)
 	BUG_ON((vma->vmfd == NULL) || !vma->e->has_fdflags);
 	flags = vma->e->fdflags;
 
-	ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
-	if (ret < 0)
-		return ret;
+	if (ctx.flags != flags || ctx.desc != vma->vmfd) {
+		ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
+		if (ret < 0)
+			return ret;
 
-	vma->e->fd = ret;
+		filemap_ctx_fini();
+
+		ctx.flags = flags;
+		ctx.desc = vma->vmfd;
+		ctx.fd = ret;
+	}
+
+	ctx.vma = vma;
+	vma->e->fd = ctx.fd;
 	return 0;
 }
 
-int collect_filemap(struct vma_area *vma, struct vma_file_ctx *ctx)
+int collect_filemap(struct vma_area *vma)
 {
 	struct file_desc *fd;
 
@@ -1677,19 +1730,7 @@ int collect_filemap(struct vma_area *vma, struct vma_file_ctx *ctx)
 		return -1;
 
 	vma->vmfd = fd;
-	if (ctx->vma && ctx->flags == vma->e->flags && ctx->fd == fd) {
-		vma->vm_open = borrow_filemap;
-		vma->fvma = ctx->vma;
-		ctx->vma->e->status |= VMA_NO_CLOSE;
-		/* Change VMA so that next borrower sets NO_CLOSE on us */
-		ctx->vma = vma;
-	} else {
-		vma->vm_open = open_filemap;
-		ctx->flags = vma->e->fdflags;
-		ctx->fd = fd;
-		ctx->vma = vma;
-	}
-
+	vma->vm_open = open_filemap;
 	return 0;
 }
 
diff --git a/criu/include/files-reg.h b/criu/include/files-reg.h
index afae362..1c49be9 100644
--- a/criu/include/files-reg.h
+++ b/criu/include/files-reg.h
@@ -40,12 +40,9 @@ extern struct file_remap *lookup_ghost_remap(u32 dev, u32 ino);
 
 extern struct file_desc *try_collect_special_file(u32 id, int optional);
 #define collect_special_file(id)	try_collect_special_file(id, 0)
-struct vma_file_ctx {
-	u32 flags;
-	struct file_desc *fd;
-	struct vma_area *vma;
-};
-extern int collect_filemap(struct vma_area *, struct vma_file_ctx *ctx);
+extern int collect_filemap(struct vma_area *);
+extern void filemap_ctx_init(bool auto_close);
+extern void filemap_ctx_fini(void);
 
 extern int collect_remaps_and_regfiles(void);
 
diff --git a/criu/include/image.h b/criu/include/image.h
index 9846512..d9c4bdb 100644
--- a/criu/include/image.h
+++ b/criu/include/image.h
@@ -89,7 +89,7 @@
 #define VMA_AREA_VVAR		(1 <<  12)
 #define VMA_AREA_AIORING	(1 <<  13)
 
-#define VMA_NO_CLOSE		(1 <<  28)
+#define VMA_CLOSE		(1 <<  28)
 #define VMA_NO_PROT_WRITE	(1 <<  29)
 #define VMA_PREMMAPED		(1 <<  30)
 #define VMA_UNSUPP		(1 <<  31)
diff --git a/criu/include/vma.h b/criu/include/vma.h
index a1f914c..779466c 100644
--- a/criu/include/vma.h
+++ b/criu/include/vma.h
@@ -55,7 +55,6 @@ struct vma_area {
 			int (*vm_open)(int pid, struct vma_area *vma);
 			struct file_desc *vmfd;
 			struct vma_area	*pvma;		/* parent for inherited VMAs */
-			struct vma_area	*fvma;		/* vma from which to borrow a file */
 			unsigned long	*page_bitmap;	/* existent pages */
 			unsigned long	premmaped_addr;	/* restore only */
 
diff --git a/criu/mem.c b/criu/mem.c
index 0b3c900..3c7186a 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -481,7 +481,6 @@ int prepare_mm_pid(struct pstree_item *i)
 	int ret = -1, vn = 0;
 	struct cr_img *img;
 	struct rst_info *ri = rsti(i);
-	struct vma_file_ctx ctx = {};
 
 	img = open_image(CR_FD_MM, O_RSTR, pid);
 	if (!img)
@@ -541,7 +540,7 @@ int prepare_mm_pid(struct pstree_item *i)
 			ret = collect_shmem(pid, vma);
 		else if (vma_area_is(vma, VMA_FILE_PRIVATE) ||
 				vma_area_is(vma, VMA_FILE_SHARED))
-			ret = collect_filemap(vma, &ctx);
+			ret = collect_filemap(vma);
 		else if (vma_area_is(vma, VMA_AREA_SOCKET))
 			ret = collect_socket_map(vma);
 		else
@@ -707,10 +706,6 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void
 			pr_perror("Unable to map ANON_VMA");
 			return -1;
 		}
-
-		if (vma_area_is(vma, VMA_FILE_PRIVATE) &&
-				!vma_area_is(vma, VMA_NO_CLOSE))
-			close(vma->e->fd);
 	} else {
 		void *paddr;
 
@@ -784,6 +779,8 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas,
 	int ret = 0;
 	LIST_HEAD(empty);
 
+	filemap_ctx_init(true);
+
 	list_for_each_entry(vma, &vmas->h, list) {
 		if (pstart > vma->e->start) {
 			ret = -1;
@@ -819,6 +816,8 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas,
 			break;
 	}
 
+	filemap_ctx_fini();
+
 	return ret;
 }
 
@@ -1128,6 +1127,8 @@ int open_vmas(struct pstree_item *t)
 	struct vma_area *vma;
 	struct vm_area_list *vmas = &rsti(t)->vmas;
 
+	filemap_ctx_init(false);
+
 	list_for_each_entry(vma, &vmas->h, list) {
 		if (!vma_area_is(vma, VMA_AREA_REGULAR) || !vma->vm_open)
 			continue;
@@ -1140,8 +1141,19 @@ int open_vmas(struct pstree_item *t)
 			pr_err("`- Can't open vma\n");
 			return -1;
 		}
+
+		/*
+		 * File mappings have vm_open set to open_filemap which, in
+		 * turn, puts the VMA_CLOSE bit itself. For all the rest we
+		 * need to put it by hads, so that the restorer closes the fd
+		 */
+		if (!(vma_area_is(vma, VMA_FILE_PRIVATE) ||
+					vma_area_is(vma, VMA_FILE_SHARED)))
+			vma->e->status |= VMA_CLOSE;
 	}
 
+	filemap_ctx_fini();
+
 	return 0;
 }
 
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 3e61822..e8c1118 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -650,7 +650,7 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
 			vma_entry->pgoff);
 
 	if ((vma_entry->fd != -1) &&
-			!(vma_entry->status & VMA_NO_CLOSE))
+			(vma_entry->status & VMA_CLOSE))
 		sys_close(vma_entry->fd);
 
 	return addr;
-- 
2.1.4


More information about the CRIU mailing list