[CRIU] [PATCH 1/2 v2] criu: dump filemap as soon as possible

Andrey Vagin avagin at openvz.org
Tue May 17 17:06:03 PDT 2016


From: Andrew Vagin <avagin at virtuozzo.com>

A process can have many vma-s and we can hit rlimit for files.

I suggest to dump vma files as soon as possible and close them.

https://jira.sw.ru/browse/PSBM-46355

v2: remove vm_file_fd from the vma_area structure

Cc: Cyrill Gorcunov <gorcunov at openvz.org>
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
 criu/cr-dump.c             | 32 +++++++-------------------------
 criu/cr-exec.c             |  2 +-
 criu/include/proc_parse.h  |  4 +++-
 criu/include/sysfs_parse.h |  2 +-
 criu/include/vma.h         |  6 ++++--
 criu/proc_parse.c          | 46 +++++++++++++++++++++++++++++-----------------
 criu/sysfs_parse.c         |  4 ++--
 criu/util.c                |  1 -
 8 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 8ef46a4..1fa2bfd 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -86,26 +86,11 @@
 
 static char loc_buf[PAGE_SIZE];
 
-static void close_vma_file(struct vma_area *vma)
-{
-	if (vma->vm_file_fd < 0)
-		return;
-	if (vma->e->status & VMA_AREA_SOCKET)
-		return;
-	if (vma->file_borrowed)
-		return;
-	if (vma_area_is(vma, VMA_AREA_AIORING))
-		return;
-
-	close(vma->vm_file_fd);
-}
-
 void free_mappings(struct vm_area_list *vma_area_list)
 {
 	struct vma_area *vma_area, *p;
 
 	list_for_each_entry_safe(vma_area, p, &vma_area_list->h, list) {
-		close_vma_file(vma_area);
 		if (!vma_area->file_borrowed)
 			free(vma_area->vmst);
 		free(vma_area);
@@ -115,7 +100,8 @@ void free_mappings(struct vm_area_list *vma_area_list)
 	vma_area_list->nr = 0;
 }
 
-int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list)
+int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list,
+						dump_filemap_t dump_file)
 {
 	int ret = -1;
 
@@ -123,7 +109,7 @@ int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list)
 	pr_info("Collecting mappings (pid: %d)\n", pid);
 	pr_info("----------------------------------------\n");
 
-	ret = parse_smaps(pid, vma_area_list);
+	ret = parse_smaps(pid, vma_area_list, dump_file);
 	if (ret < 0)
 		goto err;
 
@@ -365,8 +351,7 @@ static int dump_pid_misc(pid_t pid, TaskCoreEntry *tc)
 	return 0;
 }
 
-static int dump_filemap(pid_t pid, struct vma_area *vma_area,
-		const struct cr_imgset *imgset)
+static int dump_filemap(struct vma_area *vma_area, int fd)
 {
 	struct fd_parms p = FD_PARMS_INIT;
 	VmaEntry *vma = vma_area->e;
@@ -397,7 +382,7 @@ static int dump_filemap(pid_t pid, struct vma_area *vma_area,
 	/* Flags will be set during restore in get_filemap_fd() */
 
 	if (fd_id_generate_special(&p, &id))
-		ret = dump_one_reg_file(vma_area->vm_file_fd, id, &p);
+		ret = dump_one_reg_file(fd, id, &p);
 
 	vma->shmid = id;
 	return ret;
@@ -470,9 +455,6 @@ static int dump_task_mm(pid_t pid, const struct proc_pid_stat *stat,
 			ret = check_sysvipc_map_dump(pid, vma);
 		else if (vma_entry_is(vma, VMA_ANON_SHARED))
 			ret = add_shmem_area(pid, vma);
-		else if (vma_entry_is(vma, VMA_FILE_PRIVATE) ||
-				vma_entry_is(vma, VMA_FILE_SHARED))
-			ret = dump_filemap(pid, vma_area, imgset);
 		else if (vma_entry_is(vma, VMA_AREA_SOCKET))
 			ret = dump_socket_map(vma_area);
 		else
@@ -1127,7 +1109,7 @@ static int pre_dump_one_task(struct pstree_item *item, struct list_head *ctls)
 	if (item->pid.state == TASK_DEAD)
 		return 0;
 
-	ret = collect_mappings(pid, &vmas);
+	ret = collect_mappings(pid, &vmas, NULL);
 	if (ret) {
 		pr_err("Collect mappings (pid: %d) failed with %d\n", pid, ret);
 		goto err;
@@ -1207,7 +1189,7 @@ static int dump_one_task(struct pstree_item *item)
 	if (ret < 0)
 		goto err;
 
-	ret = collect_mappings(pid, &vmas);
+	ret = collect_mappings(pid, &vmas, dump_filemap);
 	if (ret) {
 		pr_err("Collect mappings (pid: %d) failed with %d\n", pid, ret);
 		goto err;
diff --git a/criu/cr-exec.c b/criu/cr-exec.c
index e595ec1..a4c07e2 100644
--- a/criu/cr-exec.c
+++ b/criu/cr-exec.c
@@ -147,7 +147,7 @@ int cr_exec(int pid, char **opt)
 	 */
 	free(creds);
 
-	ret = collect_mappings(pid, &vmas);
+	ret = collect_mappings(pid, &vmas, NULL);
 	if (ret) {
 		pr_err("Can't collect vmas for %d\n", pid);
 		goto out_unseize;
diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h
index 5de5c86..5208ba2 100644
--- a/criu/include/proc_parse.h
+++ b/criu/include/proc_parse.h
@@ -128,7 +128,9 @@ extern int parse_pid_stat(pid_t pid, struct proc_pid_stat *s);
 extern unsigned int parse_pid_loginuid(pid_t pid, int *err, bool ignore_noent);
 extern int parse_pid_oom_score_adj(pid_t pid, int *err);
 extern int prepare_loginuid(unsigned int value, unsigned int loglevel);
-extern int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list);
+struct vma_area;
+typedef int (*dump_filemap_t)(struct vma_area *vma_area, int fd);
+extern int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t cb);
 extern int parse_self_maps_lite(struct vm_area_list *vms);
 extern int parse_pid_status(pid_t pid, struct proc_status_creds *);
 
diff --git a/criu/include/sysfs_parse.h b/criu/include/sysfs_parse.h
index 4d74c4e..3d49b4c 100644
--- a/criu/include/sysfs_parse.h
+++ b/criu/include/sysfs_parse.h
@@ -7,7 +7,7 @@
 #define AUFSBR_PATH_LEN	(SBINFO_PATH_LEN + 6 + 1)	/* /sys/fs/aufs/<sbinfo>/br%3d */
 
 extern int parse_aufs_branches(struct mount_info *mi);
-extern int fixup_aufs_vma_fd(struct vma_area *vma);
+extern int fixup_aufs_vma_fd(struct vma_area *vma, int vm_file_fd);
 extern void free_aufs_branches(void);
 
 #endif /* __CR_SYSFS_PARSE_H__ */
diff --git a/criu/include/vma.h b/criu/include/vma.h
index d69f5f0..7db5f4c 100644
--- a/criu/include/vma.h
+++ b/criu/include/vma.h
@@ -44,7 +44,6 @@ struct vma_area {
 				 *
 				 * The aio_nr_req is only for aio rings.
 				 */
-				int	vm_file_fd;
 				int	vm_socket_id;
 				unsigned int aio_nr_req;
 			};
@@ -71,8 +70,11 @@ struct vma_area {
 	};
 };
 
+typedef int (*dump_filemap_t)(struct vma_area *vma_area, int fd);
+
 extern struct vma_area *alloc_vma_area(void);
-extern int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list);
+extern int collect_mappings(pid_t pid,
+		struct vm_area_list *vma_area_list, dump_filemap_t cb);
 extern void free_mappings(struct vm_area_list *vma_area_list);
 
 #define vma_area_is(vma_area, s)	vma_entry_is((vma_area)->e, s)
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
index cebf21c..8806fc5 100644
--- a/criu/proc_parse.c
+++ b/criu/proc_parse.c
@@ -177,7 +177,9 @@ static inline int vfi_equal(struct vma_file_info *a, struct vma_file_info *b)
 }
 
 static int vma_get_mapfile(char *fname, struct vma_area *vma, DIR *mfd,
-		struct vma_file_info *vfi, struct vma_file_info *prev_vfi)
+			   struct vma_file_info *vfi,
+			   struct vma_file_info *prev_vfi,
+			   int *vm_file_fd)
 {
 	char path[32];
 	int flags;
@@ -189,12 +191,11 @@ static int vma_get_mapfile(char *fname, struct vma_area *vma, DIR *mfd,
 		 * If vfi is equal (!) and negative @vm_file_fd --
 		 * we have nothing to borrow for sure.
 		 */
-		if (prev->vm_file_fd < 0)
+		if (*vm_file_fd < 0)
 			return 0;
 
 		pr_debug("vma %"PRIx64" borrows vfi from previous %"PRIx64"\n",
 				vma->e->start, prev->e->start);
-		vma->vm_file_fd = prev->vm_file_fd;
 		if (prev->e->status & VMA_AREA_SOCKET)
 			vma->e->status |= VMA_AREA_SOCKET | VMA_AREA_REGULAR;
 
@@ -209,6 +210,7 @@ static int vma_get_mapfile(char *fname, struct vma_area *vma, DIR *mfd,
 
 		return 0;
 	}
+	close_safe(vm_file_fd);
 
 	/* Figure out if it's file mapping */
 	snprintf(path, sizeof(path), "%"PRIx64"-%"PRIx64, vma->e->start, vma->e->end);
@@ -229,8 +231,8 @@ static int vma_get_mapfile(char *fname, struct vma_area *vma, DIR *mfd,
 		 */
 		flags = O_RDONLY;
 
-	vma->vm_file_fd = openat(dirfd(mfd), path, flags);
-	if (vma->vm_file_fd < 0) {
+	*vm_file_fd = openat(dirfd(mfd), path, flags);
+	if (*vm_file_fd < 0) {
 		if (errno == ENOENT)
 			/* Just mapping w/o map_files link */
 			return 0;
@@ -250,7 +252,7 @@ static int vma_get_mapfile(char *fname, struct vma_area *vma, DIR *mfd,
 
 			if ((buf.st_mode & S_IFMT) == 0 && !strncmp(fname, AIO_FNAME, sizeof(AIO_FNAME) - 1)) {
 				/* AIO ring, let's try */
-				close(vma->vm_file_fd);
+				close_safe(vm_file_fd);
 				vma->aio_nr_req = -1;
 				vma->e->status = VMA_AREA_AIORING;
 				return 0;
@@ -341,7 +343,7 @@ static int vma_get_mapfile(char *fname, struct vma_area *vma, DIR *mfd,
 				return -1;
 			}
 
-			vma->vm_file_fd = fd;
+			*vm_file_fd = fd;
 			return 0;
 		}
 
@@ -365,14 +367,14 @@ static int vma_get_mapfile(char *fname, struct vma_area *vma, DIR *mfd,
 	if (opts.aufs) {
 		int ret;
 
-		ret = fixup_aufs_vma_fd(vma);
+		ret = fixup_aufs_vma_fd(vma, *vm_file_fd);
 		if (ret < 0)
 			return -1;
 		if (ret > 0)
 			return 0;
 	}
 
-	if (fstat(vma->vm_file_fd, vma->vmst) < 0) {
+	if (fstat(*vm_file_fd, vma->vmst) < 0) {
 		pr_perror("Failed fstat on map %"PRIx64"", vma->e->start);
 		return -1;
 	}
@@ -448,9 +450,11 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area,
 			char *file_path, DIR *map_files_dir,
 			struct vma_file_info *vfi,
 			struct vma_file_info *prev_vfi,
-			struct vm_area_list *vma_area_list)
+			struct vm_area_list *vma_area_list,
+			int *vm_file_fd)
 {
-	if (vma_get_mapfile(file_path, vma_area, map_files_dir, vfi, prev_vfi))
+	if (vma_get_mapfile(file_path, vma_area, map_files_dir,
+					vfi, prev_vfi, vm_file_fd))
 		goto err_bogus_mapfile;
 
 	if (vma_area->e->status != 0) {
@@ -489,7 +493,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area,
 		vma_area->e->shmid = prev->e->shmid;
 		vma_area->vmst = prev->vmst;
 		vma_area->mnt_id = prev->mnt_id;
-	} else if (vma_area->vm_file_fd >= 0) {
+	} else if (*vm_file_fd >= 0) {
 		struct stat *st_buf = vma_area->vmst;
 
 		if (S_ISREG(st_buf->st_mode))
@@ -531,7 +535,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area,
 		 * have mnt_id.
 		 */
 		if (vma_area->mnt_id != -1 &&
-		    get_fd_mntid(vma_area->vm_file_fd, &vma_area->mnt_id))
+		    get_fd_mntid(*vm_file_fd, &vma_area->mnt_id))
 			return -1;
 	} else {
 		/*
@@ -552,7 +556,7 @@ err:
 err_bogus_mapping:
 	pr_err("Bogus mapping 0x%"PRIx64"-0x%"PRIx64" (flags: %#x vm_file_fd: %d)\n",
 	       vma_area->e->start, vma_area->e->end,
-	       vma_area->e->flags, vma_area->vm_file_fd);
+	       vma_area->e->flags, *vm_file_fd);
 	goto err;
 
 err_bogus_mapfile:
@@ -593,12 +597,13 @@ static int vma_list_add(struct vma_area *vma_area,
 	return 0;
 }
 
-int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list)
+int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list,
+					dump_filemap_t dump_filemap)
 {
 	struct vma_area *vma_area = NULL;
 	unsigned long start, end, pgoff, prev_end = 0;
 	char r, w, x, s;
-	int ret = -1;
+	int ret = -1, vm_file_fd = -1;
 	struct vma_file_info vfi;
 	struct vma_file_info prev_vfi = {};
 
@@ -693,8 +698,14 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list)
 		}
 
 		if (handle_vma(pid, vma_area, str + path_off, map_files_dir,
-					&vfi, &prev_vfi, vma_area_list))
+				&vfi, &prev_vfi, vma_area_list, &vm_file_fd))
 			goto err;
+
+		if (vma_entry_is(vma_area->e, VMA_FILE_PRIVATE) ||
+				vma_entry_is(vma_area->e, VMA_FILE_SHARED)) {
+			if (dump_filemap && dump_filemap(vma_area, vm_file_fd))
+				goto err;
+		}
 	}
 
 	vma_area = NULL;
@@ -703,6 +714,7 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list)
 err:
 	bclose(&f);
 err_n:
+	close_safe(&vm_file_fd);
 	if (map_files_dir)
 		closedir(map_files_dir);
 
diff --git a/criu/sysfs_parse.c b/criu/sysfs_parse.c
index a947db6..9ec148c 100644
--- a/criu/sysfs_parse.c
+++ b/criu/sysfs_parse.c
@@ -272,13 +272,13 @@ err:
  * globl root (/) for later use in dump_filemap()
  * and parse_smaps().
  */
-int fixup_aufs_vma_fd(struct vma_area *vma)
+int fixup_aufs_vma_fd(struct vma_area *vma, int vm_file_fd)
 {
 	char path[PATH_MAX];
 	int len;
 
 	path[0] = '.';
-	len = read_fd_link(vma->vm_file_fd, &path[1], sizeof path - 1);
+	len = read_fd_link(vm_file_fd, &path[0], sizeof path - 1);
 	if (len < 0)
 		return -1;
 
diff --git a/criu/util.c b/criu/util.c
index e8ebe61..eab0ac2 100644
--- a/criu/util.c
+++ b/criu/util.c
@@ -800,7 +800,6 @@ struct vma_area *alloc_vma_area(void)
 	if (p) {
 		p->e = (VmaEntry *)(p + 1);
 		vma_entry__init(p->e);
-		p->vm_file_fd = -1;
 		p->e->fd = -1;
 	}
 
-- 
2.7.4



More information about the CRIU mailing list