[CRIU] [PATCH 6/8] vdso: Don't fail if pagemap is not accessbile
Cyrill Gorcunov
gorcunov at openvz.org
Mon Oct 5 12:54:35 PDT 2015
We use page frame number to detect vDSO which has been remapped
in-place from runtime vDSO during restore. In such case if the
kernel is younger than 3.16 the "[vdso]" mark won't be reported
in procfs output.
Still to address recently reported CVEs and be able to run CRIU
in unprivileged mode we need to handle vDSO without pagemap access
and here is the deal -- when we find VMA which "looks like" vDSO
we try to scan it for vDSO symbols and if it matches we restore
its status without PFN access.
Here is some details on @pagemap access in-kernel history:
- @pagemap introduced in commit 85863e475e59 where anyone
which can attach to a task via ptrace is allowed to read
data from @pagemap (Feb 4 2008, v2.6.25-rc1)
- in commit 006ebb40d3d65 ptrace attach rule has been changed
into ptrace read permission (May 19 2008, v2.6.27-rc1)
- in commit ab676b7d6fbf4 opening of @pagemap become guarded
with CAP_SYS_ADMIN because of leak of physical addresses
into userspace (Mar 9 2015, v4.0-rc5)
- in commit 1c90308e7a77a opening of @pagemap become available
for regular users again (with ptrace read permission) but
physical addresses of pages are hidden from non-privileged
userd (Sep 8 2015, v4.3-rc1)
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
include/parasite.h | 2 ++
pie/parasite.c | 9 +++++++++
vdso.c | 59 ++++++++++++++++++++++++++++++++++--------------------
3 files changed, 48 insertions(+), 22 deletions(-)
diff --git a/include/parasite.h b/include/parasite.h
index f884bb5baeb4..4ec4277f9031 100644
--- a/include/parasite.h
+++ b/include/parasite.h
@@ -94,6 +94,8 @@ struct parasite_vdso_vma_entry {
unsigned long proxy_vdso_addr;
unsigned long proxy_vvar_addr;
int is_marked;
+ bool try_fill_symtable;
+ bool is_vdso;
};
struct parasite_dump_pages_args {
diff --git a/pie/parasite.c b/pie/parasite.c
index a39c035a7f71..1d96ff5742e5 100644
--- a/pie/parasite.c
+++ b/pie/parasite.c
@@ -476,6 +476,15 @@ static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args)
args->is_marked = 0;
args->proxy_vdso_addr = VDSO_BAD_ADDR;
args->proxy_vvar_addr = VVAR_BAD_ADDR;
+
+ if (args->try_fill_symtable) {
+ struct vdso_symtable t;
+
+ if (vdso_fill_symtable((void *)args->start, args->len, &t))
+ args->is_vdso = false;
+ else
+ args->is_vdso = true;
+ }
}
return 0;
diff --git a/vdso.c b/vdso.c
index a7cfc8027869..88a6a17b8439 100644
--- a/vdso.c
+++ b/vdso.c
@@ -43,21 +43,20 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
struct vma_area *proxy_vdso_marked = NULL;
struct vma_area *proxy_vvar_marked = NULL;
struct parasite_vdso_vma_entry *args;
- struct vma_area *vma;
int fd, ret, exit_code = -1;
+ u64 pfn = VDSO_BAD_PFN;
+ struct vma_area *vma;
off_t off;
- u64 pfn;
args = parasite_args(ctl, struct parasite_vdso_vma_entry);
fd = open_proc(pid, "pagemap");
if (fd < 0) {
if (errno == EPERM) {
- pr_info("No VDSO fixup possible :(\n");
- return 0;
- }
-
- return -1;
- }
+ pr_info("Pagemap is unavailable, trying a slow way\n");
+ } else
+ return -1;
+ } else
+ BUG_ON(vdso_pfn == VDSO_BAD_PFN);
list_for_each_entry(vma, &vma_area_list->h, list) {
if (!vma_area_is(vma, VMA_AREA_REGULAR))
@@ -97,12 +96,18 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
* I need to poke every potentially marked vma,
* otherwise if task never called for vdso functions
* page frame number won't be reported.
+ *
+ * Moreover, if page frame numbers are not accessible
+ * we have to scan the vma zone for vDSO elf structure
+ * which gonna be a slow way.
*/
args->start = vma->e->start;
args->len = vma_area_len(vma);
+ args->try_fill_symtable = (fd < 0) ? true : false;
+ args->is_vdso = false;
if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
- pr_err("vdso: Parasite failed to poke for mark\n");
+ pr_err("Parasite failed to poke for mark\n");
goto err;
}
@@ -122,17 +127,27 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
continue;
}
- off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
- ret = pread(fd, &pfn, sizeof(pfn), off);
- if (ret < 0 || ret != sizeof(pfn)) {
- pr_perror("Can't read pme for pid %d", pid);
- goto err;
- }
+ /*
+ * If we have an access to pagemap we can handle vDSO
+ * status early. Otherwise, in worst scenario, where
+ * the dumpee has been remapping vdso on its own and
+ * the kernel version is < 3.16, the vdso won't be
+ * detected via procfs status so we have to parse
+ * symbols in parasite code.
+ */
+ if (fd >= 0) {
+ off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
+ ret = pread(fd, &pfn, sizeof(pfn), off);
+ if (ret < 0 || ret != sizeof(pfn)) {
+ pr_perror("Can't read pme for pid %d", pid);
+ goto err;
+ }
- pfn = PME_PFRAME(pfn);
- if (!pfn) {
- pr_err("Unexpected page fram number 0 for pid %d\n", pid);
- goto err;
+ pfn = PME_PFRAME(pfn);
+ if (!pfn) {
+ pr_err("Unexpected page fram number 0 for pid %d\n", pid);
+ goto err;
+ }
}
/*
@@ -142,15 +157,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
* but only since that particular version of the
* kernel!
*/
- if (pfn == vdso_pfn) {
+ if ((pfn == vdso_pfn && pfn != VDSO_BAD_PFN) || args->is_vdso) {
if (!vma_area_is(vma, VMA_AREA_VDSO)) {
- pr_debug("vdso: Restore vDSO status by pfn at %lx\n",
+ pr_debug("Restore vDSO status by pfn/symtable at %lx\n",
(long)vma->e->start);
vma->e->status |= VMA_AREA_VDSO;
}
} else {
if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
- pr_debug("vdso: Drop mishinted vDSO status at %lx\n",
+ pr_debug("Drop mishinted vDSO status at %lx\n",
(long)vma->e->start);
vma->e->status &= ~VMA_AREA_VDSO;
}
--
2.4.3
More information about the CRIU
mailing list