[CRIU] [PATCH cr] [RFC] cr-restore: remove unshared pages from inherited private mappings

Andrey Vagin avagin at openvz.org
Thu Oct 18 07:39:16 EDT 2012


A parent process can change a few pages after forking a child and
all this pages should not be avaliable from the child.

Each vma has a bitmap of existent pages. Parent's and child's bitmaps
can be compared and all pages which are not present in a child bitmap
are dropped.

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 cr-restore.c      |   43 +++++++++++++++++++++++++++++++++++++++++--
 include/crtools.h |    2 ++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index 4f17956..327e93f 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -178,6 +178,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
 			struct vma_area **pvma, struct list_head *pvma_list)
 {
 	int ret;
+	unsigned long nr_pages;
 	struct vma_area *p = *pvma;
 
 	if (!vma_priv(&vma->vma))
@@ -194,6 +195,11 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
 		vma->vma.shmid = 0;
 	}
 
+	nr_pages = vma_entry_len(&vma->vma) / PAGE_SIZE;
+	vma->page_bitmap = xzalloc(BITS_TO_LONGS(nr_pages) * sizeof(long));
+	if (vma->page_bitmap == NULL)
+		return -1;
+
 	list_for_each_entry_continue(p, pvma_list, list) {
 		if (p->vma.start > vma->vma.start)
 			 break;
@@ -203,6 +209,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
 			pr_info("COW 0x%016lx-0x%016lx 0x%016lx vma\n",
 				vma->vma.start, vma->vma.end, vma->vma.pgoff);
 			vma->vma.shmid = p->vma.shmid;
+			break;
 		}
 
 	}
@@ -224,7 +231,8 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
 			return -1;
 		}
 		vma->vma.shmid = (unsigned long) addr;
-	}
+	} else
+		vma->ppage_bitmap = p->page_bitmap;
 
 	if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE))
 		close(vma->vma.fd);
@@ -247,7 +255,7 @@ static int restore_anon_vma_content(pid_t pid)
 	 * Read page contents.
 	 */
 	while (1) {
-		u64 va;
+		u64 va, page_offset;
 		char buf[PAGE_SIZE];
 		void *p;
 
@@ -267,6 +275,12 @@ static int restore_anon_vma_content(pid_t pid)
 			vma = list_entry(vma->list.next, struct vma_area, list);
 		}
 
+		page_offset = (va - vma->vma.start) / PAGE_SIZE;
+		if (vma->page_bitmap)
+			set_bit(page_offset, vma->page_bitmap);
+		if (vma->ppage_bitmap)
+			clear_bit(page_offset, vma->ppage_bitmap);
+
 		ret = read(fd, buf, PAGE_SIZE);
 		if (ret != PAGE_SIZE) {
 			pr_err("Can'r read mapping page %d\n", ret);
@@ -281,6 +295,31 @@ static int restore_anon_vma_content(pid_t pid)
 	}
 	close(fd);
 
+	/* Remove pages, which were not shared with a child */
+	list_for_each_entry(vma, &vma_list, list) {
+		unsigned long size, i = 0;
+
+		if (vma->ppage_bitmap == NULL)
+			continue;
+
+		size = vma_entry_len(&vma->vma) / PAGE_SIZE;
+		while (1) {
+			/* Find all pages, which avaliable only for a parent */
+			i = find_next_bit(vma->ppage_bitmap, size, i);
+
+			if ( i >= size)
+				break;
+
+			madvise((void *)(vma->vma.shmid + PAGE_SIZE * i),
+						PAGE_SIZE, MADV_DONTNEED);
+			if (ret < 0) {
+				pr_perror("madvise failed\n");
+				return -1;
+			}
+			i++;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/include/crtools.h b/include/crtools.h
index 146524d..c3e9e5b 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -213,6 +213,8 @@ struct vma_area {
 	struct list_head	list;
 	VmaEntry		vma;
 	int			vm_file_fd;
+	unsigned long		*page_bitmap;  /* existent pages */
+	unsigned long		*ppage_bitmap; /* parent's existent pages */
 };
 
 #define vma_area_is(vma_area, s)	vma_entry_is(&((vma_area)->vma), s)
-- 
1.7.1



More information about the CRIU mailing list