[CRIU] [RFC 1/3] pagemap-cache: Introduce engine

Andrew Vagin avagin at parallels.com
Fri Feb 14 07:03:06 PST 2014


On Fri, Feb 14, 2014 at 12:41:11AM +0400, Cyrill Gorcunov wrote:
> Pavel reported that in case if there a big number
> of small vmas present in the dumpee we're reading
> /proc/pid/pagemap too frequently.
> 
> To speedup this procedue we inroduce pagemap cache.
> 
> The interface is:
>  - pmc_init/pmc_fini for cache initialization and freeing
>  - pmc_get_pme to retrieve specific PME from cache
> 
> Reported-by: Pavel Emelyanov <xemul at parallels.com>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  Makefile.crtools        |   1 +
>  include/pagemap-cache.h |  40 +++++++++++++
>  pagemap-cache.c         | 150 ++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 191 insertions(+)
>  create mode 100644 include/pagemap-cache.h
>  create mode 100644 pagemap-cache.c
> 
> diff --git a/Makefile.crtools b/Makefile.crtools
> index 374cd8570289..6effc3e0958b 100644
> --- a/Makefile.crtools
> +++ b/Makefile.crtools
> @@ -53,6 +53,7 @@ obj-y	+= file-lock.o
>  obj-y	+= page-pipe.o
>  obj-y	+= page-xfer.o
>  obj-y	+= page-read.o
> +obj-y	+= pagemap-cache.o
>  obj-y	+= kerndat.o
>  obj-y	+= stats.o
>  obj-y	+= string.o
> diff --git a/include/pagemap-cache.h b/include/pagemap-cache.h
> new file mode 100644
> index 000000000000..9fe03f17d97f
> --- /dev/null
> +++ b/include/pagemap-cache.h
> @@ -0,0 +1,40 @@
> +#ifndef __CR_PAGEMAP_H__
> +#define __CR_PAGEMAP_H__
> +
> +#include <sys/types.h>
> +#include "asm/types.h"
> +
> +#include "list.h"
> +
> +struct vma_area;
> +
> +/* To carry up to 2M of physical memory */
> +#define PMC_SHIFT		(21)
> +#define PMC_SIZE		(1ul << PMC_SHIFT)
> +#define PMC_MASK		(~(PMC_SIZE - 1))
> +#define PMC_SIZE_GAP		(PMC_SIZE / 4)
> +
> +#define PAGEMAP_LEN(pages)	((pages) * sizeof(u64))
> +#define	PAGEMAP_PFN(addr)	((addr) / PAGE_SIZE)
> +
> +#define PAGEMAP_PFN_OFF(addr)	(PAGEMAP_PFN(addr) * sizeof(u64))
> +
> +#define PAGEMAP_PME_ERR		((u64)-1)
> +
> +typedef struct {
> +	pid_t			pid;		/* which process it belongs */
> +	unsigned long		start;		/* start of area */
> +	unsigned long		end;		/* end of area */
> +	struct list_head	*vma_head;	/* list head of VMAs we're serving */
> +	u64			*map;		/* local buffer */
> +	size_t			map_len;	/* length of a buffer */
> +	int			fd;		/* file to read PMs from */
> +} pmc_t;
> +
> +#define PMC_INIT (pmc_t){ }
> +
> +extern int pmc_init(pmc_t *pmc, pid_t pid, struct list_head *vma_head, size_t size);
> +extern u64 pmc_get_pme(pmc_t *pmc, struct vma_area *vma, unsigned long addr);
> +extern void pmc_fini(pmc_t *pmc);
> +
> +#endif /* __CR_PAGEMAP_H__ */
> diff --git a/pagemap-cache.c b/pagemap-cache.c
> new file mode 100644
> index 000000000000..ddeca9a12518
> --- /dev/null
> +++ b/pagemap-cache.c
> @@ -0,0 +1,150 @@
> +#include <unistd.h>
> +#include <fcntl.h>
> +
> +#include "pagemap-cache.h"
> +#include "compiler.h"
> +#include "xmalloc.h"
> +#include "util.h"
> +#include "log.h"
> +#include "vma.h"
> +
> +#undef	LOG_PREFIX
> +#define LOG_PREFIX "pagemap-cache: "
> +
> +static inline void pmc_reset(pmc_t *pmc)
> +{
> +	memzero(pmc, sizeof(*pmc));
> +	pmc->fd = -1;
> +}
> +
> +static inline void pmc_zap(pmc_t *pmc)
> +{
> +	pmc->start = pmc->end = 0;
> +}
> +
> +int pmc_init(pmc_t *pmc, pid_t pid, struct list_head *vma_head, size_t size)
> +{
> +	size_t map_size = max(size, PMC_SIZE);
> +	pmc_reset(pmc);
> +
> +	BUG_ON(!vma_head);
> +
> +	pmc->pid	= pid;
> +	pmc->fd		= open_proc(pid, "pagemap");
> +	pmc->map_len	= PAGEMAP_LEN(PAGEMAP_PFN(map_size));
> +	pmc->map	= xmalloc(pmc->map_len);
> +	pmc->vma_head	= vma_head;
> +
> +	if (!pmc->map || pmc->fd < 0) {
> +		pr_err("Failed to init pagemap for %d\n", pid);
> +		pmc_fini(pmc);
> +		return -1;
> +	}
> +
> +	pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
> +
> +	return 0;
> +}
> +
> +static u64 __pmc_get_pme(pmc_t *pmc, unsigned long addr)
> +{
> +	if (likely(pmc->start <= addr && pmc->end > addr))
> +		return pmc->map[PAGEMAP_PFN(addr - pmc->start)];
> +	return PAGEMAP_PME_ERR;
> +}
> +
> +static int pmc_fill_cache(pmc_t *pmc, struct vma_area *vma)
> +{
> +	unsigned long low = vma->e->start & PMC_MASK;
> +	unsigned long high = low + PMC_SIZE;
> +	size_t len = vma_area_len(vma);
> +	size_t size_map;
> +
> +	pmc->start = vma->e->start;
> +	pmc->end = vma->e->end;
> +
> +	pr_debug("filling VMA %lx-%lx (%luK) [l:%lx h:%lx]\n",
> +		 vma->e->start, vma->e->end, len >> 10, low, high);
> +
> +	/*
> +	 * If we meet a small VMA, lets try to fit 2M cache
> +	 * window at least 75% full, otherwise left as a plain
> +	 * "one vma at a time" read. Note the VMAs in cache must
> +	 * fit in solid manner, iow -- either the whole vma fits
> +	 * the cache window, either plain read is used.
> +	 *
> +	 * The benefit (apart redusing the number of read() calls)
> +	 * is to keep in kernel THP lock as less as possible.
> +	 */
> +	if (len < PMC_SIZE && (vma->e->start - low) < PMC_SIZE_GAP) {
> +		size_t size_cov = len;
> +		size_t nr_vmas = 1;
> +
> +		pr_debug("\t%16lx-%-16lx nr:%-5zu cov:%zu\n",
> +			 vma->e->start, vma->e->end, nr_vmas, size_cov);
> +
> +		list_for_each_entry_continue(vma, pmc->vma_head, list) {
> +			if (vma->e->start > high || vma->e->end > high)
> +				break;
> +
> +			BUG_ON(vma->e->start < low);
> +			size_cov += vma_area_len(vma);
> +			nr_vmas++;
> +
> +			pr_debug("\t%16lx-%-16lx nr:%-5zu cov:%zu\n",
> +				 vma->e->start, vma->e->end, nr_vmas, size_cov);
> +		}
> +
> +		if (size_cov > (PMC_SIZE - PMC_SIZE_GAP) && nr_vmas > 1) {
> +			pmc->start = low;
> +			pmc->end = high;
> +			pr_debug("\tcache  mode [l:%lx h:%lx]\n", pmc->start, pmc->end);
> +		} else
> +			pr_debug("\tsimple mode [l:%lx h:%lx]\n", pmc->start, pmc->end);
> +	}
> +
> +	size_map = PAGEMAP_LEN(PAGEMAP_PFN(pmc->end - pmc->start));
> +	BUG_ON(pmc->map_len < size_map);
> +
> +	if (pread(pmc->fd, pmc->map, size_map, PAGEMAP_PFN_OFF(pmc->start)) != size_map) {
> +		pmc_zap(pmc);
> +		pr_perror("Can't read %d's pagemap file", pmc->pid);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +u64 pmc_get_pme(pmc_t *pmc, struct vma_area *vma, unsigned long addr)

Why do we need to call pmc_get_pme for each page? The cache contains pfn
for all pages of one vma or nothing, doesn't it?

> +{
> +	u64 pme;
> +
> +	BUG_ON(addr < vma->e->start || addr > vma->e->end);
> +
> +	/*
> +	 * Best case -- cache hit.
> +	 */
> +	pme = __pmc_get_pme(pmc, addr);
> +	if (likely(pme != PAGEMAP_PME_ERR))
> +	    return pme;
> +
> +	/*
> +	 * Cache miss, refill the cache.
> +	 */
> +	if (pmc_fill_cache(pmc, vma)) {
> +		pr_err("Failed to fill cache for %d (%lx)\n", pmc->pid, addr);
> +		return PAGEMAP_PME_ERR;
> +	}
> +
> +	/*
> +	 * It must be a cache hit.
> +	 */
> +	return __pmc_get_pme(pmc, addr);
> +}
> +
> +void pmc_fini(pmc_t *pmc)
> +{
> +	close_safe(&pmc->fd);
> +	xfree(pmc->map);
> +	pmc_reset(pmc);
> +}
> -- 
> 1.8.3.1
> 


More information about the CRIU mailing list