[CRIU] [PATCHv0 7/8] shmem: implement anon shared memory autodedup

Eugene Batalov eabatalov89 at gmail.com
Sun Dec 6 05:15:23 PST 2015


From: Fyodor <bocharovfedor at gmail.com>

Dedup works as following: when dumping anon shmem vma we are checking its
every page for dirtiness (this info stored in bitmap with each anon shmem
vma). We dump each anon shmem vma using page-xfer. page-xfer does all
magic with storing "in_parent" field in pagemap-entry.
When restoring from deduplicated image we simply read every page with
page-read and it does all magic of reading from right place in image
hierarchy.
auto-dedup on restore is already implemented by page-read so we don't need
to code it.

Signed-off-by: Fyodor Bocharov <fbocharov at yandex.ru>
Signed-off-by: Eugene Batalov <eabatalov89 at gmail.com
---
 shmem.c | 62 ++++++++++++++++++++++++--------------------------------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/shmem.c b/shmem.c
index 8563415..792317d 100644
--- a/shmem.c
+++ b/shmem.c
@@ -148,15 +148,13 @@ static int shmem_wait_and_open(int pid, struct shmem_info *si)
 
 static int restore_shmem_content(void *addr, struct shmem_info *si)
 {
-	int ret = 0, fd_pg;
+	int ret = 0;
 	struct page_read pr;
-	unsigned long off_real;
 
 	ret = open_page_read(si->shmid, &pr, PR_SHMEM);
 	if (ret <= 0)
 		return -1;
 
-	fd_pg = img_raw_fd(pr.pi);
 	while (1) {
 		unsigned long vaddr;
 		unsigned nr_pages;
@@ -172,25 +170,15 @@ static int restore_shmem_content(void *addr, struct shmem_info *si)
 		if (vaddr + nr_pages * PAGE_SIZE > si->size)
 			break;
 
-		off_real = lseek(fd_pg, 0, SEEK_CUR);
-
-		ret = read(fd_pg, addr + vaddr, nr_pages * PAGE_SIZE);
-		if (ret != nr_pages * PAGE_SIZE) {
-			ret = -1;
-			break;
-		}
-
-		if (opts.auto_dedup) {
-			ret = punch_hole(&pr, off_real, nr_pages * PAGE_SIZE, false);
-			if (ret == -1) {
-				break;
-			}
-		}
+		ret = pr.read_pages(&pr, vaddr, nr_pages, addr + vaddr);
+		if (ret < 0)
+			goto err;
 
 		if (pr.put_pagemap)
 			pr.put_pagemap(&pr);
 	}
 
+err:
 	pr.close(&pr);
 	return ret;
 }
@@ -386,23 +374,29 @@ static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer, void *addr)
 	return page_xfer_dump_pages(xfer, pp, (unsigned long)addr);
 }
 
+/* Implementation comes from mem.c */
+
+static inline bool page_in_parent(unsigned long dirty)
+{
+	/*
+	 * If we do memory tracking, but w/o parent images,
+	 * then we have to dump all memory
+	 */
+
+	return opts.track_mem && opts.img_parent && !dirty;
+}
+
 static int dump_one_shmem(struct shmem_info_dump *si)
 {
 	struct iovec *iovs;
 	struct page_pipe *pp;
 	struct page_xfer xfer;
 	int err, ret = -1, fd;
-	unsigned char *map = NULL;
 	void *addr = NULL;
 	unsigned long pfn, nrpages;
 
 	pr_info("Dumping shared memory %ld\n", si->shmid);
 
-	nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
-	map = xmalloc(nrpages * sizeof(*map));
-	if (!map)
-		goto err;
-
 	fd = open_proc(si->pid, "map_files/%lx-%lx", si->start, si->end);
 	if (fd < 0)
 		goto err;
@@ -415,17 +409,7 @@ static int dump_one_shmem(struct shmem_info_dump *si)
 		goto err;
 	}
 
-	/*
-	 * We can't use pagemap here, because this vma is
-	 * not mapped to us at all, but mincore reports the
-	 * pagecache status of a file, which is correct in
-	 * this case.
-	 */
-
-	err = mincore(addr, si->size, map);
-	if (err)
-		goto err_unmap;
-
+	nrpages = BLOCKS_CNT(si->size, PAGE_SIZE);
 	iovs = xmalloc(((nrpages + 1) / 2) * sizeof(struct iovec));
 	if (!iovs)
 		goto err_unmap;
@@ -439,10 +423,13 @@ static int dump_one_shmem(struct shmem_info_dump *si)
 		goto err_pp;
 
 	for (pfn = 0; pfn < nrpages; pfn++) {
-		if (!(map[pfn] & PAGE_RSS))
-			continue;
+		unsigned long dirty = test_bit(pfn, si->pdirty_map);
 again:
-		ret = page_pipe_add_page(pp, (unsigned long)addr + pfn * PAGE_SIZE);
+		if (xfer.parent && page_in_parent(dirty))
+			ret = page_pipe_add_hole(pp, (unsigned long)addr + pfn * PAGE_SIZE);
+		else
+			ret = page_pipe_add_page(pp, (unsigned long)addr + pfn * PAGE_SIZE);
+
 		if (ret == -EAGAIN) {
 			ret = dump_pages(pp, &xfer, addr);
 			if (ret)
@@ -464,7 +451,6 @@ err_iovs:
 err_unmap:
 	munmap(addr,  si->size);
 err:
-	xfree(map);
 	return ret;
 }
 
-- 
1.9.1



More information about the CRIU mailing list