[CRIU] [PATCH] kdat: Handle pagemaps with zeroed pfns

Andrew Vagin avagin at virtuozzo.com
Thu Jan 14 09:16:09 PST 2016


On Wed, Dec 30, 2015 at 01:45:54PM +0300, Pavel Emelyanov wrote:
> Recent kernels allow for user to read proc pagemap file, but zero
> pfns in it. Support this mode for user dumps.
> 
> https://github.com/xemul/criu/issues/101
> 
> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>

Acked-by: Andrew Vagin <avagin at virtuozzo.com>
> 
> ---
> 
> diff --git a/include/kerndat.h b/include/kerndat.h
> index 23d9104..a02d15b 100644
> --- a/include/kerndat.h
> +++ b/include/kerndat.h
> @@ -16,6 +16,13 @@ extern int kerndat_get_dirty_track(void);
>  extern int kerndat_fdinfo_has_lock(void);
>  extern int kerndat_loginuid(bool only_dump);
>  
> +enum pagemap_func {
> +	PM_UNKNOWN,
> +	PM_DISABLED,	/* /proc/pid/pagemap doesn't open (user mode) */
> +	PM_FLAGS_ONLY,	/* pagemap zeroes pfn part (user mode) */
> +	PM_FULL,
> +};
> +
>  struct kerndat_s {
>  	dev_t shmem_dev;
>  	int tcp_max_rshare;
> @@ -27,6 +34,7 @@ struct kerndat_s {
>  	unsigned long task_size;
>  	bool ipv6;
>  	bool has_loginuid;
> +	enum pagemap_func pmap;
>  };
>  
>  extern struct kerndat_s kdat;
> diff --git a/kerndat.c b/kerndat.c
> index a7c72a2..696701d 100644
> --- a/kerndat.c
> +++ b/kerndat.c
> @@ -41,6 +41,42 @@ struct kerndat_s kdat = {
>  	.tcp_max_rshare = 87380,
>  };
>  
> +static int check_pagemap(void)
> +{
> +	int ret, fd;
> +	u64 pfn = 0;
> +
> +	fd = __open_proc(PROC_SELF, EPERM, O_RDONLY, "pagemap");
> +	if (fd < 0) {
> +		if (errno == EPERM) {
> +			pr_info("Pagemap disabled");
> +			kdat.pmap = PM_DISABLED;
> +			return 0;
> +		}
> +
> +		return -1;
> +	}
> +
> +	/* Get the PFN of some present page. Stack is here, so try it :) */
> +	ret = pread(fd, &pfn, sizeof(pfn), (((unsigned long)&ret) / page_size()) * sizeof(pfn));
> +	if (ret != sizeof(pfn)) {
> +		pr_perror("Can't read pagemap");
> +		return -1;
> +	}
> +
> +	close(fd);
> +
> +	if ((pfn & PME_PFRAME_MASK) == 0) {
> +		pr_info("Pagemap provides flags only\n");
> +		kdat.pmap = PM_FLAGS_ONLY;
> +	} else {
> +		pr_info("Pagemap is fully functional\n");
> +		kdat.pmap = PM_FULL;
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Anonymous shared mappings are backed by hidden tmpfs
>   * mount. Find out its dev to distinguish such mappings
> @@ -322,13 +358,15 @@ static int init_zero_page_pfn()
>  		return -1;
>  	}
>  
> +	if (kdat.pmap != PM_FULL) {
> +		pr_info("Zero page detection failed, optimization turns off.\n");
> +		return 0;
> +	}
> +
>  	ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn);
>  	munmap(addr, PAGE_SIZE);
>  
> -	if (ret == 1) {
> -		pr_info("Zero page detection failed, optimization turns off.\n");
> -		ret = 0;
> -	} else if (kdat.zero_page_pfn == 0)
> +	if (kdat.zero_page_pfn == 0)
>  		ret = -1;
>  
>  	return ret;
> @@ -456,7 +494,9 @@ int kerndat_init(void)
>  {
>  	int ret;
>  
> -	ret = kerndat_get_shmemdev();
> +	ret = check_pagemap();
> +	if (!ret)
> +		ret = kerndat_get_shmemdev();
>  	if (!ret)
>  		ret = kerndat_get_dirty_track();
>  	if (!ret)
> @@ -487,7 +527,9 @@ int kerndat_init_rst(void)
>  	 * not available inside namespaces.
>  	 */
>  
> -	ret = tcp_read_sysctl_limits();
> +	ret = check_pagemap();
> +	if (!ret)
> +		ret = tcp_read_sysctl_limits();
>  	if (!ret)
>  		ret = get_last_cap();
>  	if (!ret)
> diff --git a/pagemap-cache.c b/pagemap-cache.c
> index 5420586..c2e467b 100644
> --- a/pagemap-cache.c
> +++ b/pagemap-cache.c
> @@ -46,13 +46,14 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz
>  	if (!pmc->map)
>  		goto err;
>  
> -	pmc->fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
> -	if (pmc->fd < 0) {
> -		if (errno != EPERM)
> -			goto err;
> -
> +	if (kdat.pmap == PM_DISABLED) {
> +		pmc->fd = -1;
>  		pr_warn("No pagemap for %d available, "
>  				"switching to greedy mode\n", pid);
> +	} else {
> +		pmc->fd = open_proc(pid, "pagemap");
> +		if (pmc->fd < 0)
> +			goto err;
>  	}
>  
>  	pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
> diff --git a/util.c b/util.c
> index 31ef539..0633500 100644
> --- a/util.c
> +++ b/util.c
> @@ -763,9 +763,9 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn)
>  	int fd, ret = -1;
>  	off_t off;
>  
> -	fd = __open_proc(getpid(), EPERM, O_RDONLY, "pagemap");
> +	fd = open_proc(getpid(), "pagemap");
>  	if (fd < 0)
> -		return errno == EPERM ? 1 : -1;
> +		return -1;
>  
>  	off = (vaddr / page_size()) * sizeof(u64);
>  	if (lseek(fd, off, SEEK_SET) != off) {
> diff --git a/vdso.c b/vdso.c
> index a6a9b30..c547cf5 100644
> --- a/vdso.c
> +++ b/vdso.c
> @@ -43,20 +43,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
>  	struct vma_area *proxy_vdso_marked = NULL;
>  	struct vma_area *proxy_vvar_marked = NULL;
>  	struct parasite_vdso_vma_entry *args;
> -	int fd, ret, exit_code = -1;
> +	int fd = -1, ret, exit_code = -1;
>  	u64 pfn = VDSO_BAD_PFN;
>  	struct vma_area *vma;
>  	off_t off;
>  
>  	args = parasite_args(ctl, struct parasite_vdso_vma_entry);
> -	fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
> -	if (fd < 0) {
> -		if (errno == EPERM) {
> -			pr_info("Pagemap is unavailable, trying a slow way\n");
> -		} else
> +	if (kdat.pmap == PM_FULL) {
> +		BUG_ON(vdso_pfn == VDSO_BAD_PFN);
> +		fd = open_proc(pid, "pagemap");
> +		if (fd < 0)
>  			return -1;
>  	} else
> -		BUG_ON(vdso_pfn == VDSO_BAD_PFN);
> +		pr_info("Pagemap is unavailable, trying a slow way\n");
>  
>  	list_for_each_entry(vma, &vma_area_list->h, list) {
>  		if (!vma_area_is(vma, VMA_AREA_REGULAR))
> @@ -311,8 +310,11 @@ int vdso_init(void)
>  {
>  	if (vdso_fill_self_symtable(&vdso_sym_rt))
>  		return -1;
> -	if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn) != 0)
> +
> +	if (kdat.pmap != PM_FULL)
>  		pr_info("VDSO detection turned off\n");
> +	else if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn))
> +		return -1;
>  
>  	return 0;
>  }
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list