[CRIU] [PATCH] vdso: Don't fail if pagemap is not accessbile

Cyrill Gorcunov gorcunov at openvz.org
Mon Sep 28 08:07:00 PDT 2015


We use page frame number to detect vDSO which has been remapped
in-place from runtime vDSO during restore. In such case if the
kernel is younger than 3.16 the "[vdso]" mark won't be reported
in procfs output.

Still to address recently reported CVEs and be able to run CRIU
in unprivileged mode we need to handle vDSO without pagemap access
and here is the deal -- when we find VMA which "looks like" vDSO
we try to scan it for vDSO symbols and if it matches we restore
its status without PFN access.

The good news are that since commit 1c90308e7a77af pfn read no
longer requires CAP_SYS_ADMIN, so kernel 4.3 wont need this hack.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 include/parasite.h |  2 ++
 pie/parasite.c     |  9 +++++++++
 vdso.c             | 59 ++++++++++++++++++++++++++++++++++--------------------
 3 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/include/parasite.h b/include/parasite.h
index f884bb5baeb4..4ec4277f9031 100644
--- a/include/parasite.h
+++ b/include/parasite.h
@@ -94,6 +94,8 @@ struct parasite_vdso_vma_entry {
 	unsigned long	proxy_vdso_addr;
 	unsigned long	proxy_vvar_addr;
 	int		is_marked;
+	bool		try_fill_symtable;
+	bool		is_vdso;
 };
 
 struct parasite_dump_pages_args {
diff --git a/pie/parasite.c b/pie/parasite.c
index a39c035a7f71..1d96ff5742e5 100644
--- a/pie/parasite.c
+++ b/pie/parasite.c
@@ -476,6 +476,15 @@ static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args)
 		args->is_marked = 0;
 		args->proxy_vdso_addr = VDSO_BAD_ADDR;
 		args->proxy_vvar_addr = VVAR_BAD_ADDR;
+
+		if (args->try_fill_symtable) {
+			struct vdso_symtable t;
+
+			if (vdso_fill_symtable((void *)args->start, args->len, &t))
+				args->is_vdso = false;
+			else
+				args->is_vdso = true;
+		}
 	}
 
 	return 0;
diff --git a/vdso.c b/vdso.c
index a7cfc8027869..88a6a17b8439 100644
--- a/vdso.c
+++ b/vdso.c
@@ -43,21 +43,20 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
 	struct vma_area *proxy_vdso_marked = NULL;
 	struct vma_area *proxy_vvar_marked = NULL;
 	struct parasite_vdso_vma_entry *args;
-	struct vma_area *vma;
 	int fd, ret, exit_code = -1;
+	u64 pfn = VDSO_BAD_PFN;
+	struct vma_area *vma;
 	off_t off;
-	u64 pfn;
 
 	args = parasite_args(ctl, struct parasite_vdso_vma_entry);
 	fd = open_proc(pid, "pagemap");
 	if (fd < 0) {
 		if (errno == EPERM) {
-			pr_info("No VDSO fixup possible :(\n");
-			return 0;
-		}
-
-		return -1;
-	}
+			pr_info("Pagemap is unavailable, trying a slow way\n");
+		} else
+			return -1;
+	} else
+		BUG_ON(vdso_pfn == VDSO_BAD_PFN);
 
 	list_for_each_entry(vma, &vma_area_list->h, list) {
 		if (!vma_area_is(vma, VMA_AREA_REGULAR))
@@ -97,12 +96,18 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
 		 * I need to poke every potentially marked vma,
 		 * otherwise if task never called for vdso functions
 		 * page frame number won't be reported.
+		 *
+		 * Moreover, if page frame numbers are not accessible
+		 * we have to scan the vma zone for vDSO elf structure
+		 * which gonna be a slow way.
 		 */
 		args->start = vma->e->start;
 		args->len = vma_area_len(vma);
+		args->try_fill_symtable = (fd < 0) ? true : false;
+		args->is_vdso = false;
 
 		if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
-			pr_err("vdso: Parasite failed to poke for mark\n");
+			pr_err("Parasite failed to poke for mark\n");
 			goto err;
 		}
 
@@ -122,17 +127,27 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
 			continue;
 		}
 
-		off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
-		ret = pread(fd, &pfn, sizeof(pfn), off);
-		if (ret < 0 || ret != sizeof(pfn)) {
-			pr_perror("Can't read pme for pid %d", pid);
-			goto err;
-		}
+		/*
+		 * If we have an access to pagemap we can handle vDSO
+		 * status early. Otherwise, in worst scenario, where
+		 * the dumpee has been remapping vdso on its own and
+		 * the kernel version is < 3.16, the vdso won't be
+		 * detected via procfs status so we have to parse
+		 * symbols in parasite code.
+		 */
+		if (fd >= 0) {
+			off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
+			ret = pread(fd, &pfn, sizeof(pfn), off);
+			if (ret < 0 || ret != sizeof(pfn)) {
+				pr_perror("Can't read pme for pid %d", pid);
+				goto err;
+			}
 
-		pfn = PME_PFRAME(pfn);
-		if (!pfn) {
-			pr_err("Unexpected page fram number 0 for pid %d\n", pid);
-			goto err;
+			pfn = PME_PFRAME(pfn);
+			if (!pfn) {
+				pr_err("Unexpected page fram number 0 for pid %d\n", pid);
+				goto err;
+			}
 		}
 
 		/*
@@ -142,15 +157,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
 		 * but only since that particular version of the
 		 * kernel!
 		 */
-		if (pfn == vdso_pfn) {
+		if ((pfn == vdso_pfn && pfn != VDSO_BAD_PFN) || args->is_vdso) {
 			if (!vma_area_is(vma, VMA_AREA_VDSO)) {
-				pr_debug("vdso: Restore vDSO status by pfn at %lx\n",
+				pr_debug("Restore vDSO status by pfn/symtable at %lx\n",
 					 (long)vma->e->start);
 				vma->e->status |= VMA_AREA_VDSO;
 			}
 		} else {
 			if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
-				pr_debug("vdso: Drop mishinted vDSO status at %lx\n",
+				pr_debug("Drop mishinted vDSO status at %lx\n",
 					 (long)vma->e->start);
 				vma->e->status &= ~VMA_AREA_VDSO;
 			}
-- 
2.4.3



More information about the CRIU mailing list