[CRIU] [PATCH v5 5/8] criu: pagemap: add entries for zero pages

Mike Rapoport rppt at linux.vnet.ibm.com
Tue Jun 28 22:55:10 PDT 2016


The pages that are mapped to zero_page_pfn are not dumped but information
where are they located is required for lazy restore.
Note that get_pagemap users presumed that zero pages are not a part of the
pagemap and these pages were just silently skipped during memory restore.
At the moment I preserve this semantics and force get_pagemap to skip zero
pages.

Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
 criu/include/page-pipe.h | 13 +++++++++-
 criu/include/page-xfer.h |  2 +-
 criu/include/stats.h     |  1 +
 criu/mem.c               | 22 +++++++++++------
 criu/page-pipe.c         | 15 +++++++++---
 criu/page-xfer.c         | 63 +++++++++++++++++++++++++++++++++---------------
 criu/pagemap.c           | 27 ++++++++++++++-------
 criu/stats.c             |  1 +
 images/pagemap.proto     |  1 +
 images/stats.proto       |  2 ++
 10 files changed, 106 insertions(+), 41 deletions(-)

diff --git a/criu/include/page-pipe.h b/criu/include/page-pipe.h
index f11071c..ed6067c 100644
--- a/criu/include/page-pipe.h
+++ b/criu/include/page-pipe.h
@@ -97,6 +97,7 @@ struct page_pipe {
 					   have their iov-s in there */
 
 	struct page_pipe_iovs holes;	/* iovs for holes */
+	struct page_pipe_iovs zeros;	/* iovs for zero pages */
 
 	bool chunk_mode;	/* Restrict the maximum buffer size of pipes
 				   and dump memory for a few iterations */
@@ -108,7 +109,17 @@ extern struct page_pipe *create_page_pipe(unsigned int nr,
 extern void destroy_page_pipe(struct page_pipe *p);
 extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr,
 			      unsigned int flags);
-extern int page_pipe_add_hole(struct page_pipe *p, unsigned long addr);
+extern int __page_pipe_add_hole(struct page_pipe_iovs *i, unsigned long addr);
+
+static inline int page_pipe_add_hole(struct page_pipe *p, unsigned long addr)
+{
+	return __page_pipe_add_hole(&p->holes, addr);
+}
+
+static inline int page_pipe_add_zero(struct page_pipe *p, unsigned long addr)
+{
+	return __page_pipe_add_hole(&p->zeros, addr);
+}
 
 extern void debug_show_page_pipe(struct page_pipe *pp);
 void page_pipe_reinit(struct page_pipe *pp);
diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index d19671b..3ba61ed 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -16,7 +16,7 @@ struct page_xfer {
 	/* transfers pages related to previous pagemap */
 	int (*write_pages)(struct page_xfer *self, int pipe, unsigned long len);
 	/* transfers one hole -- vaddr:len entry w/o pages */
-	int (*write_hole)(struct page_xfer *self, struct iovec *iov);
+	int (*write_hole)(struct page_xfer *self, struct iovec *iov, int type);
 	void (*close)(struct page_xfer *self);
 
 	/* private data for every page-xfer engine */
diff --git a/criu/include/stats.h b/criu/include/stats.h
index e417636..c0effa7 100644
--- a/criu/include/stats.h
+++ b/criu/include/stats.h
@@ -25,6 +25,7 @@ enum {
 	CNT_PAGES_SCANNED,
 	CNT_PAGES_SKIPPED_PARENT,
 	CNT_PAGES_WRITTEN,
+	CNT_PAGES_ZERO,
 
 	DUMP_CNT_NR_STATS,
 };
diff --git a/criu/mem.c b/criu/mem.c
index d619a72..c4af568 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -107,14 +107,17 @@ static inline bool should_dump_page(VmaEntry *vmae, u64 pme)
 		return false;
 	if (vma_entry_is(vmae, VMA_AREA_AIORING))
 		return true;
-	if (pme & PME_SWAP)
-		return true;
-	if ((pme & PME_PRESENT) && ((pme & PME_PFRAME_MASK) != kdat.zero_page_pfn))
+	if (pme & (PME_PRESENT | PME_SWAP))
 		return true;
 
 	return false;
 }
 
+static inline bool page_is_zero(u64 pme)
+{
+	return (pme & PME_PFRAME_MASK) == kdat.zero_page_pfn;
+}
+
 static inline bool page_in_parent(u64 pme)
 {
 	/*
@@ -138,7 +141,7 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u
 {
 	u64 *at = &map[PAGE_PFN(*off)];
 	unsigned long pfn, nr_to_scan;
-	unsigned long pages[2] = {};
+	unsigned long pages[3] = {};
 
 	nr_to_scan = (vma_area_len(vma) - *off) / PAGE_SIZE;
 
@@ -165,9 +168,12 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u
 		if (has_parent && page_in_parent(at[pfn])) {
 			ret = page_pipe_add_hole(pp, vaddr);
 			pages[0]++;
+		} else if (page_is_zero(at[pfn])) {
+			ret = page_pipe_add_zero(pp, vaddr);
+			pages[1]++;
 		} else {
 			ret = page_pipe_add_page(pp, vaddr, ppb_flags);
-			pages[1]++;
+			pages[2]++;
 		}
 
 		if (ret) {
@@ -180,9 +186,11 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u
 
 	cnt_add(CNT_PAGES_SCANNED, nr_to_scan);
 	cnt_add(CNT_PAGES_SKIPPED_PARENT, pages[0]);
-	cnt_add(CNT_PAGES_WRITTEN, pages[1]);
+	cnt_add(CNT_PAGES_ZERO, pages[1]);
+	cnt_add(CNT_PAGES_WRITTEN, pages[2]);
 
-	pr_info("Pagemap generated: %lu pages %lu holes\n", pages[1], pages[0]);
+	pr_info("Pagemap generated: %lu pages %lu zeros %lu holes\n",
+		pages[2], pages[1], pages[0]);
 	return 0;
 }
 
diff --git a/criu/page-pipe.c b/criu/page-pipe.c
index d002c3a..93a6b65 100644
--- a/criu/page-pipe.c
+++ b/criu/page-pipe.c
@@ -151,6 +151,11 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs,
 	pp->pages.free_iov = 0;
 	pp->pages.busy_iov = 0;
 
+	pp->zeros.nr_iovs = 0;
+	pp->zeros.free_iov = 0;
+	pp->zeros.busy_iov = 0;
+	pp->zeros.iovs = NULL;
+
 	pp->holes.nr_iovs = 0;
 	pp->holes.free_iov = 0;
 	pp->holes.iovs = NULL;
@@ -267,10 +272,8 @@ int page_pipe_add_page(struct page_pipe *pp, unsigned long addr,
 
 #define PP_HOLES_BATCH	32
 
-int page_pipe_add_hole(struct page_pipe *pp, unsigned long addr)
+int __page_pipe_add_hole(struct page_pipe_iovs *hole, unsigned long addr)
 {
-	struct page_pipe_iovs *hole = &pp->holes;
-
 	if (hole->free_iov >= hole->nr_iovs) {
 		hole->iovs = xrealloc(hole->iovs,
 				(hole->nr_iovs + PP_HOLES_BATCH) * sizeof(struct iovec));
@@ -476,4 +479,10 @@ void debug_show_page_pipe(struct page_pipe *pp)
 		iov = &pp->holes.iovs[i];
 		pr_debug("\t%p %lu\n", iov->iov_base, iov->iov_len / PAGE_SIZE);
 	}
+
+	pr_debug("* %u zeros:\n", pp->zeros.free_iov);
+	for (i = 0; i < pp->zeros.free_iov; i++) {
+		iov = &pp->zeros.iovs[i];
+		pr_debug("\t%p %lu\n", iov->iov_base, iov->iov_len / PAGE_SIZE);
+	}
 }
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 32b50f7..1164efc 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -37,6 +37,7 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov)
 #define PS_IOV_OPEN	3
 #define PS_IOV_OPEN2	4
 #define PS_IOV_PARENT	5
+#define PS_IOV_ZERO	6
 
 #define PS_IOV_FLUSH		0x1023
 #define PS_IOV_FLUSH_N_CLOSE	0x1024
@@ -104,9 +105,10 @@ static int write_pages_to_server(struct page_xfer *xfer,
 	return 0;
 }
 
-static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov)
+static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov,
+				int type)
 {
-	return send_iov(xfer->sk, PS_IOV_HOLE, xfer->dst_id, iov);
+	return send_iov(xfer->sk, type, xfer->dst_id, iov);
 }
 
 static void close_server_xfer(struct page_xfer *xfer)
@@ -223,24 +225,35 @@ static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov)
 	}
 }
 
-static int write_pagehole_loc(struct page_xfer *xfer, struct iovec *iov)
+static int write_hole_loc(struct page_xfer *xfer, struct iovec *iov, int type)
 {
 	PagemapEntry pe = PAGEMAP_ENTRY__INIT;
 
-	if (xfer->parent != NULL) {
-		int ret;
+	iovec2pagemap(iov, &pe);
 
-		ret = check_pagehole_in_parent(xfer->parent, iov);
-		if (ret) {
-			pr_err("Hole %p/%zu not found in parent\n",
-					iov->iov_base, iov->iov_len);
-			return -1;
+	switch (type) {
+	case PS_IOV_HOLE:
+		if (xfer->parent != NULL) {
+			int ret;
+
+			ret = check_pagehole_in_parent(xfer->parent, iov);
+			if (ret) {
+				pr_err("Hole %p/%zu not found in parent\n",
+				       iov->iov_base, iov->iov_len);
+				return -1;
+			}
 		}
-	}
 
-	iovec2pagemap(iov, &pe);
-	pe.has_in_parent = true;
-	pe.in_parent = true;
+		pe.has_in_parent = true;
+		pe.in_parent = true;
+		break;
+	case PS_IOV_ZERO:
+		pe.has_zero = true;
+		pe.zero = true;
+		break;
+	default:
+		return -1;
+	}
 
 	if (pb_write_one(xfer->pmi, &pe, PB_PAGEMAP) < 0)
 		return -1;
@@ -307,7 +320,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
 out:
 	xfer->write_pagemap = write_pagemap_loc;
 	xfer->write_pages = write_pages_loc;
-	xfer->write_hole = write_pagehole_loc;
+	xfer->write_hole = write_hole_loc;
 	xfer->close = close_page_xfer;
 	return 0;
 }
@@ -321,7 +334,7 @@ int open_page_xfer(struct page_xfer *xfer, int fd_type, long id)
 }
 
 static int dump_hole(struct page_xfer *xfer, struct page_pipe_iovs *iov,
-		     void *limit, unsigned long off)
+		     void *limit, unsigned long off, int type)
 {
 	while (iov->busy_iov < iov->free_iov) {
 		struct iovec *hole = &iov->iovs[iov->busy_iov];
@@ -333,7 +346,7 @@ static int dump_hole(struct page_xfer *xfer, struct page_pipe_iovs *iov,
 		hole->iov_base -= off;
 		pr_debug("\th %p [%u]\n", hole->iov_base,
 			 (unsigned int)(hole->iov_len / PAGE_SIZE));
-		if (xfer->write_hole(xfer, hole))
+		if (xfer->write_hole(xfer, hole, type))
 			return -1;
 
 		iov->busy_iov++;
@@ -342,6 +355,15 @@ static int dump_hole(struct page_xfer *xfer, struct page_pipe_iovs *iov,
 	return 0;
 }
 
+static int dump_holes(struct page_xfer *xfer, struct page_pipe *pp,
+		      void *limit, unsigned long off)
+{
+	if (dump_hole(xfer, &pp->holes, limit, off, PS_IOV_HOLE))
+		return -1;
+
+	return dump_hole(xfer, &pp->zeros, limit, off, PS_IOV_ZERO);
+}
+
 int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp,
 		unsigned long off)
 {
@@ -357,7 +379,7 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp,
 		for (i = 0; i < ppb->nr_segs; i++) {
 			struct iovec *iov = &ppb->iov[i];
 
-			if (dump_hole(xfer, &pp->holes, iov->iov_base, off))
+			if (dump_holes(xfer, pp, iov->iov_base, off))
 				return -1;
 
 			BUG_ON(iov->iov_base < (void *)off);
@@ -372,7 +394,7 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp,
 		}
 	}
 
-	if (dump_hole(xfer, &pp->holes, NULL, off))
+	if (dump_holes(xfer, pp, NULL, off))
 		return -1;
 
 	return 0;
@@ -562,7 +584,7 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
 		return -1;
 
 	psi2iovec(pi, &iov);
-	if (lxfer->write_hole(lxfer, &iov))
+	if (lxfer->write_hole(lxfer, &iov, pi->cmd))
 		return -1;
 
 	return 0;
@@ -618,6 +640,7 @@ static int page_server_serve(int sk)
 			ret = page_server_add(sk, &pi);
 			break;
 		case PS_IOV_HOLE:
+		case PS_IOV_ZERO:
 			ret = page_server_hole(sk, &pi);
 			break;
 		case PS_IOV_FLUSH:
diff --git a/criu/pagemap.c b/criu/pagemap.c
index fded268..2416259 100644
--- a/criu/pagemap.c
+++ b/criu/pagemap.c
@@ -121,14 +121,25 @@ int dedup_one_iovec(struct page_read *pr, struct iovec *iov)
 	return 0;
 }
 
+static void put_pagemap(struct page_read *pr)
+{
+	pr->curr_pme++;
+}
+
 static int get_pagemap(struct page_read *pr, struct iovec *iov)
 {
 	PagemapEntry *pe;
 
-	if (pr->curr_pme >= pr->nr_pmes)
-		return 0;
+	for (;;) {
+		if (pr->curr_pme >= pr->nr_pmes)
+			return 0;
+
+		pe = pr->pmes[pr->curr_pme];
 
-	pe = pr->pmes[pr->curr_pme];
+		if (!pe->zero)
+			break;
+		put_pagemap(pr);
+	}
 
 	pagemap2iovec(pe, iov);
 
@@ -143,18 +154,13 @@ static int get_pagemap(struct page_read *pr, struct iovec *iov)
 	return 1;
 }
 
-static void put_pagemap(struct page_read *pr)
-{
-	pr->curr_pme++;
-}
-
 static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
 {
 	if (!len)
 		return;
 
 	pr_debug("\tpr%u Skip %lu bytes from page-dump\n", pr->id, len);
-	if (!pr->pe->in_parent)
+	if (!pr->pe->in_parent && !pr->pe->zero)
 		pr->pi_off += len;
 	pr->cvaddr += len;
 }
@@ -256,6 +262,9 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
 			vaddr += p_nr * PAGE_SIZE;
 			buf += p_nr * PAGE_SIZE;
 		} while (nr);
+	} else if (pr->pe->zero) {
+		/* zero mappings should be skipped by get_pagemap */
+		BUG();
 	} else {
 		int fd = img_raw_fd(pr->pi);
 		off_t current_vaddr = lseek(fd, pr->pi_off, SEEK_SET);
diff --git a/criu/stats.c b/criu/stats.c
index 12b7d05..c01e010 100644
--- a/criu/stats.c
+++ b/criu/stats.c
@@ -122,6 +122,7 @@ void write_stats(int what)
 		ds_entry.pages_scanned = dstats->counts[CNT_PAGES_SCANNED];
 		ds_entry.pages_skipped_parent = dstats->counts[CNT_PAGES_SKIPPED_PARENT];
 		ds_entry.pages_written = dstats->counts[CNT_PAGES_WRITTEN];
+		ds_entry.pages_zero = dstats->counts[CNT_PAGES_ZERO];
 
 		name = "dump";
 	} else if (what == RESTORE_STATS) {
diff --git a/images/pagemap.proto b/images/pagemap.proto
index b492488..f80afeb 100644
--- a/images/pagemap.proto
+++ b/images/pagemap.proto
@@ -8,4 +8,5 @@ message pagemap_entry {
 	required uint64 vaddr		= 1 [(criu).hex = true];
 	required uint32 nr_pages	= 2;
 	optional bool	in_parent	= 3;
+	optional bool	zero		= 4;
 }
diff --git a/images/stats.proto b/images/stats.proto
index 16acf34..307bb29 100644
--- a/images/stats.proto
+++ b/images/stats.proto
@@ -10,6 +10,8 @@ message dump_stats_entry {
 	required uint64			pages_written		= 7;
 
 	optional uint32			irmap_resolve		= 8;
+
+	required uint64			pages_zero		= 9;
 }
 
 message restore_stats_entry {
-- 
1.9.1



More information about the CRIU mailing list