[CRIU] [PATCH 07/10] vdso: Don't fail if pagemap is not accessbile

Pavel Emelyanov xemul at parallels.com
Mon Sep 28 12:15:14 PDT 2015


On 09/28/2015 10:01 PM, Cyrill Gorcunov wrote:
> We use page frame number to detect vDSO which has been remapped
> in-place from runtime vDSO during restore. In such case if the
> kernel is younger than 3.16 the "[vdso]" mark won't be reported
> in procfs output.
> 
> Still to address recently reported CVEs and be able to run CRIU
> in unprivileged mode we need to handle vDSO without pagemap access
> and here is the deal -- when we find VMA which "looks like" vDSO
> we try to scan it for vDSO symbols and if it matches we restore
> its status without PFN access.
> 
> The good news are that since commit 1c90308e7a77af pfn read no
> longer requires CAP_SYS_ADMIN, so kernel 4.3 wont need this hack.

Can you make some archaeology here? Which kernel disabled opening
of pagemap for non-root at all and which enabled it back with non-zero
PFN-s?

> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  include/parasite.h |  2 ++
>  pie/parasite.c     |  9 +++++++++
>  vdso.c             | 59 ++++++++++++++++++++++++++++++++++--------------------
>  3 files changed, 48 insertions(+), 22 deletions(-)
> 
> diff --git a/include/parasite.h b/include/parasite.h
> index f884bb5baeb4..4ec4277f9031 100644
> --- a/include/parasite.h
> +++ b/include/parasite.h
> @@ -94,6 +94,8 @@ struct parasite_vdso_vma_entry {
>  	unsigned long	proxy_vdso_addr;
>  	unsigned long	proxy_vvar_addr;
>  	int		is_marked;
> +	bool		try_fill_symtable;
> +	bool		is_vdso;
>  };
>  
>  struct parasite_dump_pages_args {
> diff --git a/pie/parasite.c b/pie/parasite.c
> index a39c035a7f71..1d96ff5742e5 100644
> --- a/pie/parasite.c
> +++ b/pie/parasite.c
> @@ -476,6 +476,15 @@ static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args)
>  		args->is_marked = 0;
>  		args->proxy_vdso_addr = VDSO_BAD_ADDR;
>  		args->proxy_vvar_addr = VVAR_BAD_ADDR;
> +
> +		if (args->try_fill_symtable) {
> +			struct vdso_symtable t;
> +
> +			if (vdso_fill_symtable((void *)args->start, args->len, &t))
> +				args->is_vdso = false;
> +			else
> +				args->is_vdso = true;
> +		}
>  	}
>  
>  	return 0;
> diff --git a/vdso.c b/vdso.c
> index a7cfc8027869..88a6a17b8439 100644
> --- a/vdso.c
> +++ b/vdso.c
> @@ -43,21 +43,20 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
>  	struct vma_area *proxy_vdso_marked = NULL;
>  	struct vma_area *proxy_vvar_marked = NULL;
>  	struct parasite_vdso_vma_entry *args;
> -	struct vma_area *vma;
>  	int fd, ret, exit_code = -1;
> +	u64 pfn = VDSO_BAD_PFN;
> +	struct vma_area *vma;
>  	off_t off;
> -	u64 pfn;
>  
>  	args = parasite_args(ctl, struct parasite_vdso_vma_entry);
>  	fd = open_proc(pid, "pagemap");
>  	if (fd < 0) {
>  		if (errno == EPERM) {
> -			pr_info("No VDSO fixup possible :(\n");
> -			return 0;
> -		}
> -
> -		return -1;
> -	}
> +			pr_info("Pagemap is unavailable, trying a slow way\n");
> +		} else
> +			return -1;
> +	} else
> +		BUG_ON(vdso_pfn == VDSO_BAD_PFN);
>  
>  	list_for_each_entry(vma, &vma_area_list->h, list) {
>  		if (!vma_area_is(vma, VMA_AREA_REGULAR))
> @@ -97,12 +96,18 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
>  		 * I need to poke every potentially marked vma,
>  		 * otherwise if task never called for vdso functions
>  		 * page frame number won't be reported.
> +		 *
> +		 * Moreover, if page frame numbers are not accessible
> +		 * we have to scan the vma zone for vDSO elf structure
> +		 * which gonna be a slow way.
>  		 */
>  		args->start = vma->e->start;
>  		args->len = vma_area_len(vma);
> +		args->try_fill_symtable = (fd < 0) ? true : false;
> +		args->is_vdso = false;
>  
>  		if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
> -			pr_err("vdso: Parasite failed to poke for mark\n");
> +			pr_err("Parasite failed to poke for mark\n");
>  			goto err;
>  		}
>  
> @@ -122,17 +127,27 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
>  			continue;
>  		}
>  
> -		off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
> -		ret = pread(fd, &pfn, sizeof(pfn), off);
> -		if (ret < 0 || ret != sizeof(pfn)) {
> -			pr_perror("Can't read pme for pid %d", pid);
> -			goto err;
> -		}
> +		/*
> +		 * If we have an access to pagemap we can handle vDSO
> +		 * status early. Otherwise, in worst scenario, where
> +		 * the dumpee has been remapping vdso on its own and
> +		 * the kernel version is < 3.16, the vdso won't be
> +		 * detected via procfs status so we have to parse
> +		 * symbols in parasite code.
> +		 */
> +		if (fd >= 0) {
> +			off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
> +			ret = pread(fd, &pfn, sizeof(pfn), off);
> +			if (ret < 0 || ret != sizeof(pfn)) {
> +				pr_perror("Can't read pme for pid %d", pid);
> +				goto err;
> +			}
>  
> -		pfn = PME_PFRAME(pfn);
> -		if (!pfn) {
> -			pr_err("Unexpected page fram number 0 for pid %d\n", pid);
> -			goto err;
> +			pfn = PME_PFRAME(pfn);
> +			if (!pfn) {
> +				pr_err("Unexpected page fram number 0 for pid %d\n", pid);
> +				goto err;
> +			}
>  		}
>  
>  		/*
> @@ -142,15 +157,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
>  		 * but only since that particular version of the
>  		 * kernel!
>  		 */
> -		if (pfn == vdso_pfn) {
> +		if ((pfn == vdso_pfn && pfn != VDSO_BAD_PFN) || args->is_vdso) {
>  			if (!vma_area_is(vma, VMA_AREA_VDSO)) {
> -				pr_debug("vdso: Restore vDSO status by pfn at %lx\n",
> +				pr_debug("Restore vDSO status by pfn/symtable at %lx\n",
>  					 (long)vma->e->start);
>  				vma->e->status |= VMA_AREA_VDSO;
>  			}
>  		} else {
>  			if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
> -				pr_debug("vdso: Drop mishinted vDSO status at %lx\n",
> +				pr_debug("Drop mishinted vDSO status at %lx\n",
>  					 (long)vma->e->start);
>  				vma->e->status &= ~VMA_AREA_VDSO;
>  			}
> 



More information about the CRIU mailing list