[CRIU] [PATCH] kdat: Handle pagemaps with zeroed pfns
Andrew Vagin
avagin at virtuozzo.com
Thu Jan 14 09:16:09 PST 2016
On Wed, Dec 30, 2015 at 01:45:54PM +0300, Pavel Emelyanov wrote:
> Recent kernels allow for user to read proc pagemap file, but zero
> pfns in it. Support this mode for user dumps.
>
> https://github.com/xemul/criu/issues/101
>
> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
Acked-by: Andrew Vagin <avagin at virtuozzo.com>
>
> ---
>
> diff --git a/include/kerndat.h b/include/kerndat.h
> index 23d9104..a02d15b 100644
> --- a/include/kerndat.h
> +++ b/include/kerndat.h
> @@ -16,6 +16,13 @@ extern int kerndat_get_dirty_track(void);
> extern int kerndat_fdinfo_has_lock(void);
> extern int kerndat_loginuid(bool only_dump);
>
> +enum pagemap_func {
> + PM_UNKNOWN,
> + PM_DISABLED, /* /proc/pid/pagemap doesn't open (user mode) */
> + PM_FLAGS_ONLY, /* pagemap zeroes pfn part (user mode) */
> + PM_FULL,
> +};
> +
> struct kerndat_s {
> dev_t shmem_dev;
> int tcp_max_rshare;
> @@ -27,6 +34,7 @@ struct kerndat_s {
> unsigned long task_size;
> bool ipv6;
> bool has_loginuid;
> + enum pagemap_func pmap;
> };
>
> extern struct kerndat_s kdat;
> diff --git a/kerndat.c b/kerndat.c
> index a7c72a2..696701d 100644
> --- a/kerndat.c
> +++ b/kerndat.c
> @@ -41,6 +41,42 @@ struct kerndat_s kdat = {
> .tcp_max_rshare = 87380,
> };
>
> +static int check_pagemap(void)
> +{
> + int ret, fd;
> + u64 pfn = 0;
> +
> + fd = __open_proc(PROC_SELF, EPERM, O_RDONLY, "pagemap");
> + if (fd < 0) {
> + if (errno == EPERM) {
> + pr_info("Pagemap disabled");
> + kdat.pmap = PM_DISABLED;
> + return 0;
> + }
> +
> + return -1;
> + }
> +
> + /* Get the PFN of some present page. Stack is here, so try it :) */
> + ret = pread(fd, &pfn, sizeof(pfn), (((unsigned long)&ret) / page_size()) * sizeof(pfn));
> + if (ret != sizeof(pfn)) {
> + pr_perror("Can't read pagemap");
> + return -1;
> + }
> +
> + close(fd);
> +
> + if ((pfn & PME_PFRAME_MASK) == 0) {
> + pr_info("Pagemap provides flags only\n");
> + kdat.pmap = PM_FLAGS_ONLY;
> + } else {
> + pr_info("Pagemap is fully functional\n");
> + kdat.pmap = PM_FULL;
> + }
> +
> + return 0;
> +}
> +
> /*
> * Anonymous shared mappings are backed by hidden tmpfs
> * mount. Find out its dev to distinguish such mappings
> @@ -322,13 +358,15 @@ static int init_zero_page_pfn()
> return -1;
> }
>
> + if (kdat.pmap != PM_FULL) {
> + pr_info("Zero page detection failed, optimization turns off.\n");
> + return 0;
> + }
> +
> ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn);
> munmap(addr, PAGE_SIZE);
>
> - if (ret == 1) {
> - pr_info("Zero page detection failed, optimization turns off.\n");
> - ret = 0;
> - } else if (kdat.zero_page_pfn == 0)
> + if (kdat.zero_page_pfn == 0)
> ret = -1;
>
> return ret;
> @@ -456,7 +494,9 @@ int kerndat_init(void)
> {
> int ret;
>
> - ret = kerndat_get_shmemdev();
> + ret = check_pagemap();
> + if (!ret)
> + ret = kerndat_get_shmemdev();
> if (!ret)
> ret = kerndat_get_dirty_track();
> if (!ret)
> @@ -487,7 +527,9 @@ int kerndat_init_rst(void)
> * not available inside namespaces.
> */
>
> - ret = tcp_read_sysctl_limits();
> + ret = check_pagemap();
> + if (!ret)
> + ret = tcp_read_sysctl_limits();
> if (!ret)
> ret = get_last_cap();
> if (!ret)
> diff --git a/pagemap-cache.c b/pagemap-cache.c
> index 5420586..c2e467b 100644
> --- a/pagemap-cache.c
> +++ b/pagemap-cache.c
> @@ -46,13 +46,14 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz
> if (!pmc->map)
> goto err;
>
> - pmc->fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
> - if (pmc->fd < 0) {
> - if (errno != EPERM)
> - goto err;
> -
> + if (kdat.pmap == PM_DISABLED) {
> + pmc->fd = -1;
> pr_warn("No pagemap for %d available, "
> "switching to greedy mode\n", pid);
> + } else {
> + pmc->fd = open_proc(pid, "pagemap");
> + if (pmc->fd < 0)
> + goto err;
> }
>
> pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
> diff --git a/util.c b/util.c
> index 31ef539..0633500 100644
> --- a/util.c
> +++ b/util.c
> @@ -763,9 +763,9 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn)
> int fd, ret = -1;
> off_t off;
>
> - fd = __open_proc(getpid(), EPERM, O_RDONLY, "pagemap");
> + fd = open_proc(getpid(), "pagemap");
> if (fd < 0)
> - return errno == EPERM ? 1 : -1;
> + return -1;
>
> off = (vaddr / page_size()) * sizeof(u64);
> if (lseek(fd, off, SEEK_SET) != off) {
> diff --git a/vdso.c b/vdso.c
> index a6a9b30..c547cf5 100644
> --- a/vdso.c
> +++ b/vdso.c
> @@ -43,20 +43,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
> struct vma_area *proxy_vdso_marked = NULL;
> struct vma_area *proxy_vvar_marked = NULL;
> struct parasite_vdso_vma_entry *args;
> - int fd, ret, exit_code = -1;
> + int fd = -1, ret, exit_code = -1;
> u64 pfn = VDSO_BAD_PFN;
> struct vma_area *vma;
> off_t off;
>
> args = parasite_args(ctl, struct parasite_vdso_vma_entry);
> - fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
> - if (fd < 0) {
> - if (errno == EPERM) {
> - pr_info("Pagemap is unavailable, trying a slow way\n");
> - } else
> + if (kdat.pmap == PM_FULL) {
> + BUG_ON(vdso_pfn == VDSO_BAD_PFN);
> + fd = open_proc(pid, "pagemap");
> + if (fd < 0)
> return -1;
> } else
> - BUG_ON(vdso_pfn == VDSO_BAD_PFN);
> + pr_info("Pagemap is unavailable, trying a slow way\n");
>
> list_for_each_entry(vma, &vma_area_list->h, list) {
> if (!vma_area_is(vma, VMA_AREA_REGULAR))
> @@ -311,8 +310,11 @@ int vdso_init(void)
> {
> if (vdso_fill_self_symtable(&vdso_sym_rt))
> return -1;
> - if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn) != 0)
> +
> + if (kdat.pmap != PM_FULL)
> pr_info("VDSO detection turned off\n");
> + else if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn))
> + return -1;
>
> return 0;
> }
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list