[CRIU] [PATCH 07/10] vdso: Don't fail if pagemap is not accessbile
Pavel Emelyanov
xemul at parallels.com
Mon Sep 28 12:15:14 PDT 2015
On 09/28/2015 10:01 PM, Cyrill Gorcunov wrote:
> We use page frame number to detect vDSO which has been remapped
> in-place from runtime vDSO during restore. In such case if the
> kernel is younger than 3.16 the "[vdso]" mark won't be reported
> in procfs output.
>
> Still to address recently reported CVEs and be able to run CRIU
> in unprivileged mode we need to handle vDSO without pagemap access
> and here is the deal -- when we find VMA which "looks like" vDSO
> we try to scan it for vDSO symbols and if it matches we restore
> its status without PFN access.
>
> The good news are that since commit 1c90308e7a77af pfn read no
> longer requires CAP_SYS_ADMIN, so kernel 4.3 wont need this hack.
Can you make some archaeology here? Which kernel disabled opening
of pagemap for non-root at all and which enabled it back with non-zero
PFN-s?
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> include/parasite.h | 2 ++
> pie/parasite.c | 9 +++++++++
> vdso.c | 59 ++++++++++++++++++++++++++++++++++--------------------
> 3 files changed, 48 insertions(+), 22 deletions(-)
>
> diff --git a/include/parasite.h b/include/parasite.h
> index f884bb5baeb4..4ec4277f9031 100644
> --- a/include/parasite.h
> +++ b/include/parasite.h
> @@ -94,6 +94,8 @@ struct parasite_vdso_vma_entry {
> unsigned long proxy_vdso_addr;
> unsigned long proxy_vvar_addr;
> int is_marked;
> + bool try_fill_symtable;
> + bool is_vdso;
> };
>
> struct parasite_dump_pages_args {
> diff --git a/pie/parasite.c b/pie/parasite.c
> index a39c035a7f71..1d96ff5742e5 100644
> --- a/pie/parasite.c
> +++ b/pie/parasite.c
> @@ -476,6 +476,15 @@ static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args)
> args->is_marked = 0;
> args->proxy_vdso_addr = VDSO_BAD_ADDR;
> args->proxy_vvar_addr = VVAR_BAD_ADDR;
> +
> + if (args->try_fill_symtable) {
> + struct vdso_symtable t;
> +
> + if (vdso_fill_symtable((void *)args->start, args->len, &t))
> + args->is_vdso = false;
> + else
> + args->is_vdso = true;
> + }
> }
>
> return 0;
> diff --git a/vdso.c b/vdso.c
> index a7cfc8027869..88a6a17b8439 100644
> --- a/vdso.c
> +++ b/vdso.c
> @@ -43,21 +43,20 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
> struct vma_area *proxy_vdso_marked = NULL;
> struct vma_area *proxy_vvar_marked = NULL;
> struct parasite_vdso_vma_entry *args;
> - struct vma_area *vma;
> int fd, ret, exit_code = -1;
> + u64 pfn = VDSO_BAD_PFN;
> + struct vma_area *vma;
> off_t off;
> - u64 pfn;
>
> args = parasite_args(ctl, struct parasite_vdso_vma_entry);
> fd = open_proc(pid, "pagemap");
> if (fd < 0) {
> if (errno == EPERM) {
> - pr_info("No VDSO fixup possible :(\n");
> - return 0;
> - }
> -
> - return -1;
> - }
> + pr_info("Pagemap is unavailable, trying a slow way\n");
> + } else
> + return -1;
> + } else
> + BUG_ON(vdso_pfn == VDSO_BAD_PFN);
>
> list_for_each_entry(vma, &vma_area_list->h, list) {
> if (!vma_area_is(vma, VMA_AREA_REGULAR))
> @@ -97,12 +96,18 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
> * I need to poke every potentially marked vma,
> * otherwise if task never called for vdso functions
> * page frame number won't be reported.
> + *
> + * Moreover, if page frame numbers are not accessible
> + * we have to scan the vma zone for vDSO elf structure
> + * which gonna be a slow way.
> */
> args->start = vma->e->start;
> args->len = vma_area_len(vma);
> + args->try_fill_symtable = (fd < 0) ? true : false;
> + args->is_vdso = false;
>
> if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
> - pr_err("vdso: Parasite failed to poke for mark\n");
> + pr_err("Parasite failed to poke for mark\n");
> goto err;
> }
>
> @@ -122,17 +127,27 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
> continue;
> }
>
> - off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
> - ret = pread(fd, &pfn, sizeof(pfn), off);
> - if (ret < 0 || ret != sizeof(pfn)) {
> - pr_perror("Can't read pme for pid %d", pid);
> - goto err;
> - }
> + /*
> + * If we have an access to pagemap we can handle vDSO
> + * status early. Otherwise, in worst scenario, where
> + * the dumpee has been remapping vdso on its own and
> + * the kernel version is < 3.16, the vdso won't be
> + * detected via procfs status so we have to parse
> + * symbols in parasite code.
> + */
> + if (fd >= 0) {
> + off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
> + ret = pread(fd, &pfn, sizeof(pfn), off);
> + if (ret < 0 || ret != sizeof(pfn)) {
> + pr_perror("Can't read pme for pid %d", pid);
> + goto err;
> + }
>
> - pfn = PME_PFRAME(pfn);
> - if (!pfn) {
> - pr_err("Unexpected page fram number 0 for pid %d\n", pid);
> - goto err;
> + pfn = PME_PFRAME(pfn);
> + if (!pfn) {
> + pr_err("Unexpected page fram number 0 for pid %d\n", pid);
> + goto err;
> + }
> }
>
> /*
> @@ -142,15 +157,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
> * but only since that particular version of the
> * kernel!
> */
> - if (pfn == vdso_pfn) {
> + if ((pfn == vdso_pfn && pfn != VDSO_BAD_PFN) || args->is_vdso) {
> if (!vma_area_is(vma, VMA_AREA_VDSO)) {
> - pr_debug("vdso: Restore vDSO status by pfn at %lx\n",
> + pr_debug("Restore vDSO status by pfn/symtable at %lx\n",
> (long)vma->e->start);
> vma->e->status |= VMA_AREA_VDSO;
> }
> } else {
> if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
> - pr_debug("vdso: Drop mishinted vDSO status at %lx\n",
> + pr_debug("Drop mishinted vDSO status at %lx\n",
> (long)vma->e->start);
> vma->e->status &= ~VMA_AREA_VDSO;
> }
>
More information about the CRIU
mailing list