[CRIU] [PATCH] mm: don't dump the zero page

Andrey Vagin avagin at openvz.org
Wed Jan 29 11:36:19 PST 2014


If someone reads untouched page, the kernel maps the zero page
to this address. This page will not have the SOFT_DIRTY bit and it must
not be dumped.

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 include/kerndat.h |  1 +
 kerndat.c         | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 mem.c             |  6 +++++-
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/include/kerndat.h b/include/kerndat.h
index 738ad04..de86dba 100644
--- a/include/kerndat.h
+++ b/include/kerndat.h
@@ -19,5 +19,6 @@ extern int tcp_max_wshare;
 extern int tcp_max_rshare;
 
 extern int kern_last_cap;
+extern unsigned long zero_page_pfn;
 
 #endif /* __CR_KERNDAT_H__ */
diff --git a/kerndat.c b/kerndat.c
index 78b22f8..56581db 100644
--- a/kerndat.c
+++ b/kerndat.c
@@ -7,6 +7,7 @@
 #include <errno.h>
 
 #include "log.h"
+#include "bug.h"
 #include "kerndat.h"
 #include "mem.h"
 #include "compiler.h"
@@ -151,6 +152,56 @@ out:
 	return 0;
 }
 
+/* The page frame number (PFN) is constant for the zero page */
+unsigned long zero_page_pfn;
+
+static int init_zero_page_pfn()
+{
+	void *addr;
+	loff_t off;
+	u64 pfn;
+	int fd;
+
+	addr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (addr == MAP_FAILED) {
+		pr_perror("Unable to map zero page");
+		return 0;
+	}
+
+	if (*((int *) addr) != 0) {
+		BUG();
+		goto err;
+	}
+
+	fd = open("/proc/self/pagemap", O_RDONLY);
+	if (fd < 0) {
+		pr_perror("Unable to open /proc/self/pagemap");
+		goto err;
+	}
+
+	off = (unsigned long) addr / PAGE_SIZE * 8;
+	if (lseek(fd, off, SEEK_SET) != off) {
+		pr_perror("Can't open pagemap file");
+		goto err;
+	}
+	if (read(fd, &pfn, sizeof(pfn)) != sizeof(pfn)) {
+		pr_perror("Can't read pagemap file");
+		goto err;
+	}
+
+	if (!(pfn & PME_PRESENT)) {
+		pr_err("The zero page isn't present");
+		goto err;
+	}
+
+	pfn &= PME_PFRAME_MASK;
+
+	zero_page_pfn = pfn;
+err:
+	munmap(addr, PAGE_SIZE);
+	return zero_page_pfn ? 0 : -1;
+}
+
 int kerndat_init(void)
 {
 	int ret;
@@ -158,6 +209,8 @@ int kerndat_init(void)
 	ret = kerndat_get_shmemdev();
 	if (!ret)
 		ret = kerndat_get_dirty_track();
+	if (!ret)
+		ret = init_zero_page_pfn();
 
 	return ret;
 }
diff --git a/mem.c b/mem.c
index 4e21d20..81ebef4 100644
--- a/mem.c
+++ b/mem.c
@@ -74,7 +74,9 @@ static inline bool should_dump_page(VmaEntry *vmae, u64 pme)
 	 */
 	if (vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE))
 		return false;
-	if (pme & (PME_PRESENT | PME_SWAP))
+	if (pme & PME_SWAP)
+		return true;
+	if ((pme & PME_PRESENT) && ((pme & PME_PFRAME_MASK) != zero_page_pfn))
 		return true;
 
 	return false;
@@ -199,6 +201,8 @@ static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 	pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, ctl->pid.real);
 	pr_info("----------------------------------------\n");
 
+	BUG_ON(zero_page_pfn == 0);
+
 	timing_start(TIME_MEMDUMP);
 
 	pr_debug("   Private vmas %lu/%lu pages\n",
-- 
1.8.3.1



More information about the CRIU mailing list