[CRIU] [PATCH 10/16] restorer: remap private vmas to correct places (v3)

Andrey Vagin avagin at openvz.org
Wed Nov 14 12:19:19 EST 2012


All private vmas are placed in a premmapped region and
they are sorted by start addresses, so they should be shifted apart.

Here is one more problem with overlapped temporary and target regions,
mremap could not remap such cases directly, so for such cases a vma is
remapped away and then remapped on a target place.

v2: fix accoding with Pavel's comments
v3: add a huge comment with pictures

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 cr-restore.c       |   1 +
 include/restorer.h |   1 +
 restorer.c         | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+)

diff --git a/cr-restore.c b/cr-restore.c
index 136d0c5..df77d88 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1614,6 +1614,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 
 	mem += self_vmas_len;
 	task_args->tgt_vmas = vma_list_remap(mem, vmas_len, &rst_vma_list);
+	task_args->nr_vmas = rst_nr_vmas;
 	task_args->premmapped_addr = (unsigned long) premmapped_addr;
 	task_args->premmapped_len = premmapped_len;
 	if (!task_args->tgt_vmas)
diff --git a/include/restorer.h b/include/restorer.h
index a88e4d9..4ef007f 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -99,6 +99,7 @@ struct task_restore_core_args {
 	struct task_entries		*task_entries;
 	VmaEntry			*self_vmas;
 	VmaEntry			*tgt_vmas;
+	unsigned int			nr_vmas;
 	unsigned long			premmapped_addr;
 	unsigned long			premmapped_len;
 	rt_sigaction_t			sigchld_act;
diff --git a/restorer.c b/restorer.c
index e126912..bf01f66 100644
--- a/restorer.c
+++ b/restorer.c
@@ -321,6 +321,80 @@ static void rst_tcp_socks_all(int *arr, int size)
 	sys_munmap(arr, size);
 }
 
+static int vma_remap(unsigned long src, unsigned long dst, unsigned long len)
+{
+	unsigned long guard = 0, tmp;
+
+	pr_info("Remap %lx->%lx len %lx\n", src, dst, len);
+
+	if (src - dst < len)
+		guard = dst;
+	else if (dst - src < len)
+		guard = dst + len - PAGE_SIZE;
+
+	if (src == dst)
+		return 0;
+
+	if (guard != 0) {
+		/* mremap() returns an error if a target and source vma-s are
+		 * overlapped. In this case the source vma are remapped in
+		 * a temporary place and then remapped to the target address.
+		 * Here is one hack to find non-ovelapped temporary place.
+		 * 1. initial placement. We need to move src -> tgt.
+		 * |       |+++++src+++++|
+		 * |-----tgt-----|       |
+		 *
+		 * 2. map a guard page at the non-ovelapped border of a target vma.
+		 * |       |+++++src+++++|
+		 * |G|----tgt----|       |
+		 *
+		 * 3. remap src to any other place.
+		 *    G prevents src from being remaped on tgt again
+		 * |       |-------------| -> |+++++src+++++|
+		 * |G|---tgt-----|                          |
+		 *
+		 * 4. remap src to tgt, no overlapping any longer
+		 * |+++++src+++++|   <----    |-------------|
+		 * |G|---tgt-----|                          |
+		 */
+
+		unsigned long addr;
+
+		/* Prevent overlapping with a temporary place */
+		tmp = sys_mmap((void *) guard, PAGE_SIZE, PROT_NONE,
+					MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+		if (tmp != guard) {
+			pr_err("Unable to map a guard page %lx (%lx)\n", guard, tmp);
+			return -1;
+		}
+
+		/* Choose a temporary place */
+		addr = sys_mmap(NULL, len, PROT_NONE,
+					MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+		if (addr == (unsigned long) MAP_FAILED) {
+			pr_err("Unable to reserve memory (%lx)\n", addr);
+			return -1;
+		}
+
+		tmp = sys_mremap(src, len, len,
+					MREMAP_MAYMOVE | MREMAP_FIXED, addr);
+		if (tmp != addr) {
+			pr_err("Unable to remap %lx -> %lx (%lx)\n", src, addr, tmp);
+			return -1;
+		}
+
+		src = addr;
+	}
+
+	tmp = sys_mremap(src, len, len, MREMAP_MAYMOVE | MREMAP_FIXED, dst);
+	if (tmp != dst) {
+		pr_err("Unable to remap %lx -> %lx\n", src, dst);
+		return -1;
+	}
+
+	return 0;
+}
+
 /*
  * The main routine to restore task via sigreturn.
  * This one is very special, we never return there
@@ -384,6 +458,39 @@ long __export_restore_task(struct task_restore_core_args *args)
 	sys_munmap(args->self_vmas,
 			((void *)(vma_entry + 1) - ((void *)args->self_vmas)));
 
+	/* Shift private vma-s to the left */
+	for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
+		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
+			continue;
+
+		if (!vma_priv(vma_entry))
+			continue;
+
+		if (vma_entry->start > vma_entry->shmid)
+			break;
+
+		if (vma_remap(vma_premmaped_start(vma_entry),
+				vma_entry->start, vma_entry_len(vma_entry)))
+			goto core_restore_end;
+	}
+
+	/* Shift private vma-s to the right */
+	for (vma_entry = args->tgt_vmas + args->nr_vmas -1;
+				vma_entry >= args->tgt_vmas; vma_entry--) {
+		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
+			continue;
+
+		if (!vma_priv(vma_entry))
+			continue;
+
+		if (vma_entry->start < vma_entry->shmid)
+			break;
+
+		if (vma_remap(vma_premmaped_start(vma_entry),
+				vma_entry->start, vma_entry_len(vma_entry)))
+			goto core_restore_end;
+	}
+
 	/*
 	 * OK, lets try to map new one.
 	 */
@@ -391,6 +498,9 @@ long __export_restore_task(struct task_restore_core_args *args)
 		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
 			continue;
 
+		if (vma_priv(vma_entry))
+			continue;
+
 		va = restore_mapping(vma_entry);
 
 		if (va != vma_entry->start) {
-- 
1.7.11.7



More information about the CRIU mailing list