[CRIU] [PATCH] mm: Dump vmas into separate image file

Pavel Emelyanov xemul at parallels.com
Wed Mar 21 06:22:17 EDT 2012


The core image now contains only core per-task stuff.
The new file resurrects Tula magic number removed earlier.

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>

---


-------------- next part --------------
diff --git a/cr-dump.c b/cr-dump.c
index a71f1fa..0264b88 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -490,16 +490,19 @@ static int dump_task_mappings(pid_t pid, const struct list_head *vma_area_list,
 			      const struct cr_fdset *cr_fdset)
 {
 	struct vma_area *vma_area;
-	int ret = -1;
+	int ret = -1, fd = cr_fdset->fds[CR_FD_VMAS];
 
 	pr_info("\n");
 	pr_info("Dumping mappings (pid: %d)\n", pid);
 	pr_info("----------------------------------------\n");
 
 	list_for_each_entry(vma_area, vma_area_list, list) {
-
 		struct vma_entry *vma = &vma_area->vma;
 
+		ret = write_img(fd, vma);
+		if (ret < 0)
+			goto err;
+
 		if (!vma_entry_is(vma, VMA_AREA_REGULAR))
 			continue;
 		if (vma_entry_is(vma, VMA_AREA_SYSVIPC))
@@ -512,15 +515,15 @@ static int dump_task_mappings(pid_t pid, const struct list_head *vma_area_list,
 		else if (vma_entry_is(vma, VMA_FILE_PRIVATE) ||
 				vma_entry_is(vma, VMA_FILE_SHARED))
 			ret = dump_filemap(pid, vma, vma_area->vm_file_fd, cr_fdset);
+		else
+			ret = 0;
 
 		if (ret)
 			goto err;
 	}
 
 	ret = 0;
-
 	pr_info("----------------------------------------\n");
-
 err:
 	return ret;
 }
@@ -1185,53 +1188,6 @@ static struct vma_area *find_vma_by_addr(const struct list_head *vma_area_list,
 	return NULL;
 }
 
-/* kernel expects a special format in core file */
-static int finalize_core(pid_t pid, const struct list_head *vma_area_list,
-			 const struct cr_fdset *cr_fdset)
-{
-	int fd_core;
-	unsigned long num;
-	struct vma_area *vma_area;
-	struct vma_entry ve;
-	ssize_t bytes;
-
-	pr_info("\n");
-	pr_info("Finalizing core (pid: %d)\n", pid);
-	pr_info("----------------------------------------\n");
-
-	fd_core		= cr_fdset->fds[CR_FD_CORE];
-
-	lseek(fd_core,		GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
-
-	num = 0;
-	pr_info("Appending VMAs ... ");
-
-	/* All VMAs first */
-
-	list_for_each_entry(vma_area, vma_area_list, list) {
-		bytes = write(fd_core, &vma_area->vma, sizeof(vma_area->vma));
-		if (bytes != sizeof(vma_area->vma)) {
-			pr_perror("\nUnable to write vma entry (%li written)", num);
-			goto err;
-		}
-		num++;
-	}
-
-	/* Ending marker */
-	memzero_p(&ve);
-	if (write_img(fd_core, &ve))
-		goto err;
-
-	pr_info("OK (%li written)\n", num);
-
-	pr_info("----------------------------------------\n");
-	return 0;
-
-err:
-	pr_perror("Error catched");
-	return -1;
-}
-
 static int dump_task_thread(struct parasite_ctl *parasite_ctl,
 			    pid_t pid, const struct cr_fdset *cr_fdset)
 {
@@ -1281,7 +1237,6 @@ static int dump_one_zombie(const struct pstree_item *item,
 {
 	struct core_entry *core;
 	int ret;
-	LIST_HEAD(vma_area_list);
 
 	cr_fdset = cr_dump_fdset_open(item->pid, CR_FD_DESC_CORE, cr_fdset);
 	if (cr_fdset == NULL)
@@ -1294,10 +1249,7 @@ static int dump_one_zombie(const struct pstree_item *item,
 	core->tc.task_state = TASK_DEAD;
 	core->tc.exit_code = pps->exit_code;
 
-	if (dump_task_core(core, cr_fdset) < 0)
-		return -1;
-
-	return finalize_core(item->pid, &vma_area_list, cr_fdset);
+	return dump_task_core(core, cr_fdset);
 }
 
 static struct proc_pid_stat pps_buf;
@@ -1442,12 +1394,6 @@ static int dump_one_task(const struct pstree_item *item, struct cr_fdset *cr_fds
 		goto err;
 	}
 
-	ret = finalize_core(pid, &vma_area_list, cr_fdset);
-	if (ret) {
-		pr_err("Finalizing core (pid: %d) failed with %d\n", pid, ret);
-		goto err;
-	}
-
 	free_mappings(&vma_area_list);
 
 err:
diff --git a/cr-restore.c b/cr-restore.c
index b750b9e..083724c 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -226,24 +226,21 @@ static int prepare_shmem_pid(int pid)
 	struct task_core_entry tc;
 	struct image_header hdr;
 
-	fd = open_image_ro(CR_FD_CORE, pid);
-	if (fd < 0)
-		return -1;
-
-	lseek(fd, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
+	fd = open_image_ro(CR_FD_VMAS, pid);
+	if (fd < 0) {
+		if (errno == ENOENT)
+			return 0;
+		else
+			return -1;
+	}
 
 	while (1) {
-		ret = read_img(fd, &vi);
-		if (ret < 0) {
-			pr_perror("%d: Can't read vma_entry", pid);
-			goto out;
-		}
+		ret = read_img_eof(fd, &vi);
+		if (ret <= 0)
+			break;
 
 		pr_info("%d: vma %lx %lx\n", pid, vi.start, vi.end);
 
-		if (final_vma_entry(&vi))
-			break;
-
 		if (!vma_entry_is(&vi, VMA_ANON_SHARED))
 			continue;
 
@@ -565,26 +562,25 @@ write_fd:
 
 static int fixup_vma_fds(int pid, int fd)
 {
-	int offset = GET_FILE_OFF_AFTER(struct core_entry);
-
-	lseek(fd, offset, SEEK_SET);
-
 	while (1) {
 		struct vma_entry vi;
 		int ret = 0;
 
 		ret = read(fd, &vi, sizeof(vi));
+		if (ret == 0)
+			return 0;
+
 		if (ret < 0) {
 			pr_perror("%d: Can't read vma_entry", pid);
-		} else if (ret != sizeof(vi)) {
+			return -1;
+		}
+
+		if (ret != sizeof(vi)) {
 			pr_err("%d: Incomplete vma_entry (%d != %ld)\n",
 			       pid, ret, sizeof(vi));
 			return -1;
 		}
 
-		if (final_vma_entry(&vi))
-			return 0;
-
 		if (!(vma_entry_is(&vi, VMA_AREA_REGULAR)))
 			continue;
 
@@ -626,7 +622,7 @@ static int prepare_and_sigreturn(int pid)
 	int fd = -1, err = -1;
 	struct stat buf;
 
-	fd = open_image(CR_FD_CORE, O_RDWR, pid);
+	fd = open_image(CR_FD_VMAS, O_RDWR, pid);
 	if (fd < 0)
 		return -1;
 
@@ -1377,13 +1373,12 @@ static long restorer_get_vma_hint(pid_t pid, struct list_head *self_vma_list, lo
 	 * better to stick with it.
 	 */
 
-	fd = open_image_ro_nocheck(FMT_FNAME_CORE, pid);
+	fd = open_image_ro_nocheck(FMT_FNAME_VMAS, pid);
 	if (fd < 0)
 		return -1;
 
 	prev_vma_end = 0;
-
-	lseek(fd, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
+	lseek(fd, sizeof(u32), SEEK_SET);
 
 	while (1) {
 		ret = read(fd, &vma, sizeof(vma));
@@ -1533,6 +1528,7 @@ static void sigreturn_restore(pid_t pid)
 	int fd_fdinfo = -1;
 	int fd_core = -1;
 	int fd_pages = -1;
+	int fd_vmas = -1;
 	int i;
 
 	int *fd_core_threads;
@@ -1575,6 +1571,12 @@ static void sigreturn_restore(pid_t pid)
 		goto err;
 	}
 
+	fd_vmas = open_image_ro(CR_FD_VMAS, pid);
+	if (fd_vmas < 0) {
+		pr_perror("Can't open vmas-%d", pid);
+		goto err;
+	}
+
 	restore_code_len	= sizeof(restorer_blob);
 	restore_code_len	= round_up(restore_code_len, 16);
 
@@ -1692,6 +1694,7 @@ static void sigreturn_restore(pid_t pid)
 	 */
 	task_args->pid		= pid;
 	task_args->fd_core	= fd_core;
+	task_args->fd_vmas	= fd_vmas;
 	task_args->logfd	= log_get_fd();
 	task_args->sigchld_act	= sigchld_act;
 	task_args->fd_fdinfo	= fd_fdinfo;
diff --git a/cr-show.c b/cr-show.c
index 7eafdbd..7483e35 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -111,23 +111,26 @@ out:
 	pr_img_tail(CR_FD_PIPES);
 }
 
-static void show_vma(int fd_vma)
+static void show_vmas(int fd_vma)
 {
 	struct vma_area vma_area = {};
 	struct vma_entry ve;
 
-	pr_msg("\n\t---[VMA areas]---\n");
+	pr_img_head(CR_FD_VMAS);
+
 	while (1) {
-		if (read_img(fd_vma, &ve) < 0)
-			break;
+		int ret;
 
-		if (final_vma_entry(&ve))
+		ret = read_img_eof(fd_vma, &ve);
+		if (ret <= 0)
 			break;
 
 		/* Simply in a sake of fancy printing */
 		vma_area.vma = ve;
 		pr_msg_vma(&vma_area);
 	}
+
+	pr_img_tail(CR_FD_VMAS);
 }
 
 void print_data(unsigned long addr, unsigned char *data, size_t size)
@@ -431,18 +434,6 @@ static void show_core(int fd_core, bool show_content)
 
 	show_core_regs(fd_core);
 	show_core_rest(fd_core);
-	if (is_thread)
-		goto out;
-
-	lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
-	/*
-	 * If this is thread code -- we should jump out once
-	 * we reach EOF.
-	 */
-	if (is_thread)
-		goto out;
-
-	show_vma(fd_core);
 out:
 	pr_img_tail(CR_FD_CORE);
 }
@@ -472,6 +463,9 @@ static int cr_parse_file(struct cr_options *opts)
 	case CORE_MAGIC:
 		show_core(fd, opts->show_pages_content);
 		break;
+	case VMAS_MAGIC:
+		show_vmas(fd);
+		break;
 	case PSTREE_MAGIC:
 		show_pstree(fd, NULL);
 		break;
@@ -580,6 +574,8 @@ static int cr_show_all(unsigned long pid, struct cr_options *opts)
 			}
 		}
 
+		show_vmas(cr_fdset->fds[CR_FD_VMAS]);
+
 		show_pipes(cr_fdset->fds[CR_FD_PIPES]);
 
 		show_files(cr_fdset->fds[CR_FD_FDINFO]);
diff --git a/crtools.c b/crtools.c
index 6205019..b461679 100644
--- a/crtools.c
+++ b/crtools.c
@@ -57,6 +57,11 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
 		.magic	= CORE_MAGIC,
 	},
 
+	[CR_FD_VMAS] = {
+		.fmt	= FMT_FNAME_VMAS,
+		.magic	= VMAS_MAGIC,
+	},
+
 	/* info about pipes - fds, pipe id and pipe data */
 	[CR_FD_PIPES] = {
 		.fmt	= FMT_FNAME_PIPES,
diff --git a/include/crtools.h b/include/crtools.h
index 015fcfe..90eec0d 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -22,6 +22,7 @@ enum {
 	CR_FD_FDINFO,
 	CR_FD_PAGES,
 	CR_FD_CORE,
+	CR_FD_VMAS,
 	CR_FD_PIPES,
 	CR_FD_SIGACT,
 	CR_FD_UNIXSK,
@@ -77,6 +78,7 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX];
 #define FMT_FNAME_PAGES		"pages-%d.img"
 #define FMT_FNAME_SHMEM_PAGES	"pages-shmem-%ld.img"
 #define FMT_FNAME_CORE		"core-%d.img"
+#define FMT_FNAME_VMAS		"vmas-%d.img"
 #define FMT_FNAME_PIPES		"pipes-%d.img"
 #define FMT_FNAME_PSTREE	"pstree-%d.img"
 #define FMT_FNAME_SIGACTS	"sigacts-%d.img"
@@ -118,6 +120,7 @@ struct cr_fdset {
 	CR_FD_DESC_USE(CR_FD_FDINFO)		|\
 	CR_FD_DESC_USE(CR_FD_PAGES)		|\
 	CR_FD_DESC_USE(CR_FD_CORE)		|\
+	CR_FD_DESC_USE(CR_FD_VMAS)		|\
 	CR_FD_DESC_USE(CR_FD_PIPES)		|\
 	CR_FD_DESC_USE(CR_FD_SIGACT)		|\
 	CR_FD_DESC_USE(CR_FD_UNIXSK)		|\
diff --git a/include/image.h b/include/image.h
index cdc4af8..5bcfffd 100644
--- a/include/image.h
+++ b/include/image.h
@@ -13,6 +13,7 @@
 #define FDINFO_MAGIC	0x56213732 /* Dmitrov */
 #define PAGES_MAGIC	0x56084025 /* Vladimir */
 #define CORE_MAGIC	0x55053847 /* Kolomna */
+#define VMAS_MAGIC	0x54123737 /* Tula */
 #define PIPES_MAGIC	0x56513555 /* Tver */
 #define SIGACT_MAGIC	0x55344201 /* Murom */
 #define UNIXSK_MAGIC	0x54373943 /* Ryazan */
@@ -180,7 +181,6 @@ struct ipc_sem_entry {
 
 #define vma_entry_is(vma, s)	(((vma)->status & (s)) == (s))
 #define vma_entry_len(vma)	((vma)->end - (vma)->start)
-#define final_vma_entry(vma)	((vma)->start == 0 && (vma)->end == 0)
 
 struct page_entry {
 	u64	va;
diff --git a/include/restorer.h b/include/restorer.h
index 2ea465f..1ce7ad2 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -65,6 +65,7 @@ struct task_restore_core_args {
 
 	int				pid;			/* task pid */
 	int				fd_core;		/* opened core file */
+	int				fd_vmas;		/* opened vmas file */
 	int				fd_fdinfo;		/* opened files dump file */
 	int				fd_pages;		/* opened pages dump file */
 	int				logfd;
diff --git a/restorer.c b/restorer.c
index 1f44196..78b0584 100644
--- a/restorer.c
+++ b/restorer.c
@@ -398,9 +398,8 @@ long restore_task(struct task_restore_core_args *args)
 	 * OK, lets try to map new one.
 	 */
 	vma_entry = next_on_heap(vma_entry, core_entry);
-	sys_lseek(args->fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
 	while (1) {
-		ret = sys_read(args->fd_core, vma_entry, sizeof(*vma_entry));
+		ret = sys_read(args->fd_vmas, vma_entry, sizeof(*vma_entry));
 		if (!ret)
 			break;
 		if (ret != sizeof(*vma_entry)) {
@@ -409,9 +408,6 @@ long restore_task(struct task_restore_core_args *args)
 			goto core_restore_end;
 		}
 
-		if (final_vma_entry(vma_entry))
-			break;
-
 		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
 			continue;
 
@@ -459,9 +455,9 @@ long restore_task(struct task_restore_core_args *args)
 	 * Walk though all VMAs again to drop PROT_WRITE
 	 * if it was not there.
 	 */
-	sys_lseek(args->fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
+	sys_lseek(args->fd_vmas, MAGIC_OFFSET, SEEK_SET);
 	while (1) {
-		ret = sys_read(args->fd_core, vma_entry, sizeof(*vma_entry));
+		ret = sys_read(args->fd_vmas, vma_entry, sizeof(*vma_entry));
 		if (!ret)
 			break;
 		if (ret != sizeof(*vma_entry)) {
@@ -470,9 +466,6 @@ long restore_task(struct task_restore_core_args *args)
 			goto core_restore_end;
 		}
 
-		if (final_vma_entry(vma_entry))
-			break;
-
 		if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
 			continue;
 
@@ -494,6 +487,7 @@ long restore_task(struct task_restore_core_args *args)
 			     vma_entry->prot);
 	}
 
+	sys_close(args->fd_vmas);
 	sys_close(args->fd_core);
 
 	ret = sys_munmap(args->shmems, SHMEMS_SIZE);


More information about the CRIU mailing list