[CRIU] [PATCH] kdat: Handle pagemaps with zeroed pfns

Pavel Emelyanov xemul at parallels.com
Wed Dec 30 02:45:54 PST 2015


Recent kernels allow for user to read proc pagemap file, but zero
pfns in it. Support this mode for user dumps.

https://github.com/xemul/criu/issues/101

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>

---

diff --git a/include/kerndat.h b/include/kerndat.h
index 23d9104..a02d15b 100644
--- a/include/kerndat.h
+++ b/include/kerndat.h
@@ -16,6 +16,13 @@ extern int kerndat_get_dirty_track(void);
 extern int kerndat_fdinfo_has_lock(void);
 extern int kerndat_loginuid(bool only_dump);
 
+enum pagemap_func {
+	PM_UNKNOWN,
+	PM_DISABLED,	/* /proc/pid/pagemap doesn't open (user mode) */
+	PM_FLAGS_ONLY,	/* pagemap zeroes pfn part (user mode) */
+	PM_FULL,
+};
+
 struct kerndat_s {
 	dev_t shmem_dev;
 	int tcp_max_rshare;
@@ -27,6 +34,7 @@ struct kerndat_s {
 	unsigned long task_size;
 	bool ipv6;
 	bool has_loginuid;
+	enum pagemap_func pmap;
 };
 
 extern struct kerndat_s kdat;
diff --git a/kerndat.c b/kerndat.c
index a7c72a2..696701d 100644
--- a/kerndat.c
+++ b/kerndat.c
@@ -41,6 +41,42 @@ struct kerndat_s kdat = {
 	.tcp_max_rshare = 87380,
 };
 
+static int check_pagemap(void)
+{
+	int ret, fd;
+	u64 pfn = 0;
+
+	fd = __open_proc(PROC_SELF, EPERM, O_RDONLY, "pagemap");
+	if (fd < 0) {
+		if (errno == EPERM) {
+			pr_info("Pagemap disabled");
+			kdat.pmap = PM_DISABLED;
+			return 0;
+		}
+
+		return -1;
+	}
+
+	/* Get the PFN of some present page. Stack is here, so try it :) */
+	ret = pread(fd, &pfn, sizeof(pfn), (((unsigned long)&ret) / page_size()) * sizeof(pfn));
+	if (ret != sizeof(pfn)) {
+		pr_perror("Can't read pagemap");
+		return -1;
+	}
+
+	close(fd);
+
+	if ((pfn & PME_PFRAME_MASK) == 0) {
+		pr_info("Pagemap provides flags only\n");
+		kdat.pmap = PM_FLAGS_ONLY;
+	} else {
+		pr_info("Pagemap is fully functional\n");
+		kdat.pmap = PM_FULL;
+	}
+
+	return 0;
+}
+
 /*
  * Anonymous shared mappings are backed by hidden tmpfs
  * mount. Find out its dev to distinguish such mappings
@@ -322,13 +358,15 @@ static int init_zero_page_pfn()
 		return -1;
 	}
 
+	if (kdat.pmap != PM_FULL) {
+		pr_info("Zero page detection failed, optimization turns off.\n");
+		return 0;
+	}
+
 	ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn);
 	munmap(addr, PAGE_SIZE);
 
-	if (ret == 1) {
-		pr_info("Zero page detection failed, optimization turns off.\n");
-		ret = 0;
-	} else if (kdat.zero_page_pfn == 0)
+	if (kdat.zero_page_pfn == 0)
 		ret = -1;
 
 	return ret;
@@ -456,7 +494,9 @@ int kerndat_init(void)
 {
 	int ret;
 
-	ret = kerndat_get_shmemdev();
+	ret = check_pagemap();
+	if (!ret)
+		ret = kerndat_get_shmemdev();
 	if (!ret)
 		ret = kerndat_get_dirty_track();
 	if (!ret)
@@ -487,7 +527,9 @@ int kerndat_init_rst(void)
 	 * not available inside namespaces.
 	 */
 
-	ret = tcp_read_sysctl_limits();
+	ret = check_pagemap();
+	if (!ret)
+		ret = tcp_read_sysctl_limits();
 	if (!ret)
 		ret = get_last_cap();
 	if (!ret)
diff --git a/pagemap-cache.c b/pagemap-cache.c
index 5420586..c2e467b 100644
--- a/pagemap-cache.c
+++ b/pagemap-cache.c
@@ -46,13 +46,14 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz
 	if (!pmc->map)
 		goto err;
 
-	pmc->fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
-	if (pmc->fd < 0) {
-		if (errno != EPERM)
-			goto err;
-
+	if (kdat.pmap == PM_DISABLED) {
+		pmc->fd = -1;
 		pr_warn("No pagemap for %d available, "
 				"switching to greedy mode\n", pid);
+	} else {
+		pmc->fd = open_proc(pid, "pagemap");
+		if (pmc->fd < 0)
+			goto err;
 	}
 
 	pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
diff --git a/util.c b/util.c
index 31ef539..0633500 100644
--- a/util.c
+++ b/util.c
@@ -763,9 +763,9 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn)
 	int fd, ret = -1;
 	off_t off;
 
-	fd = __open_proc(getpid(), EPERM, O_RDONLY, "pagemap");
+	fd = open_proc(getpid(), "pagemap");
 	if (fd < 0)
-		return errno == EPERM ? 1 : -1;
+		return -1;
 
 	off = (vaddr / page_size()) * sizeof(u64);
 	if (lseek(fd, off, SEEK_SET) != off) {
diff --git a/vdso.c b/vdso.c
index a6a9b30..c547cf5 100644
--- a/vdso.c
+++ b/vdso.c
@@ -43,20 +43,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
 	struct vma_area *proxy_vdso_marked = NULL;
 	struct vma_area *proxy_vvar_marked = NULL;
 	struct parasite_vdso_vma_entry *args;
-	int fd, ret, exit_code = -1;
+	int fd = -1, ret, exit_code = -1;
 	u64 pfn = VDSO_BAD_PFN;
 	struct vma_area *vma;
 	off_t off;
 
 	args = parasite_args(ctl, struct parasite_vdso_vma_entry);
-	fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
-	if (fd < 0) {
-		if (errno == EPERM) {
-			pr_info("Pagemap is unavailable, trying a slow way\n");
-		} else
+	if (kdat.pmap == PM_FULL) {
+		BUG_ON(vdso_pfn == VDSO_BAD_PFN);
+		fd = open_proc(pid, "pagemap");
+		if (fd < 0)
 			return -1;
 	} else
-		BUG_ON(vdso_pfn == VDSO_BAD_PFN);
+		pr_info("Pagemap is unavailable, trying a slow way\n");
 
 	list_for_each_entry(vma, &vma_area_list->h, list) {
 		if (!vma_area_is(vma, VMA_AREA_REGULAR))
@@ -311,8 +310,11 @@ int vdso_init(void)
 {
 	if (vdso_fill_self_symtable(&vdso_sym_rt))
 		return -1;
-	if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn) != 0)
+
+	if (kdat.pmap != PM_FULL)
 		pr_info("VDSO detection turned off\n");
+	else if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn))
+		return -1;
 
 	return 0;
 }



More information about the CRIU mailing list