[CRIU] [PATCH 1/2] pagemap: Introduce pagemap cache
Andrew Vagin
avagin at parallels.com
Tue Feb 11 13:15:02 PST 2014
On Tue, Feb 11, 2014 at 11:08:43PM +0400, Cyrill Gorcunov wrote:
>
> Pavel reported that in case if there a big number
> of small vmas present in the dumpee we're reading
> /proc/pid/pagemap too frequently.
>
> To speedup this procedue we inroduce pagemap cache.
> The idea behind is simple
>
> - cache can carry PMEs which cover up to 2M of physical memory
> at least (or if longest VMA area is bigger than 2M, then we
> take its size for cache)
> - when it is asked to fetch PME from the cache and cache miss
> found, we walk over all near laying VMAs and pre-read their
> PMEs
>
> The interface is:
> - pmc_init/pmc_fini for cache initialization and freeing
> - pmc_get_pfn_index to retrieve specific PME from cache
>
> Reported-by: Pavel Emelyanov <xemul at parallels.com>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> Makefile.crtools | 1 +
> include/pagemap.h | 33 ++++++++++++++
> pagemap.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 160 insertions(+)
> create mode 100644 include/pagemap.h
> create mode 100644 pagemap.c
>
> diff --git a/Makefile.crtools b/Makefile.crtools
> index 374cd8570289..4aefd7bec278 100644
> --- a/Makefile.crtools
> +++ b/Makefile.crtools
> @@ -53,6 +53,7 @@ obj-y += file-lock.o
> obj-y += page-pipe.o
> obj-y += page-xfer.o
> obj-y += page-read.o
> +obj-y += pagemap.o
> obj-y += kerndat.o
> obj-y += stats.o
> obj-y += string.o
> diff --git a/include/pagemap.h b/include/pagemap.h
> new file mode 100644
> index 000000000000..d5a581918951
> --- /dev/null
> +++ b/include/pagemap.h
> @@ -0,0 +1,33 @@
> +#ifndef __CR_PAGEMAP_H__
> +#define __CR_PAGEMAP_H__
> +
> +#include <sys/types.h>
> +#include "asm/types.h"
> +
> +struct vma_area;
> +
> +typedef struct {
> + u64 pme;
> +} pagemap_entry_t;
> +
> +/* To carry up to 2M of physical memory */
> +#define PAGEMAP_MAX_AREA (2ul << 20)
> +#define PAGEMAP_PFN(addr) ((addr) / PAGE_SIZE)
> +#define PAGEMAP_MAX_PAGES PAGEMAP_PFN(PAGEMAP_MAX_AREA)
> +#define PAGEMAP_SIZE(pages) ((pages) * sizeof(pagemap_entry_t))
> +#define PAGEMAP_PFN_OFF(addr) (PAGEMAP_PFN(addr) * sizeof(pagemap_entry_t))
> +
> +typedef struct {
> + pid_t pid; /* which process it belongs */
> + unsigned long start; /* start of area */
> + unsigned long end; /* end of area */
> + pagemap_entry_t *map; /* local buffer */
> + size_t nr_pages; /* map allocated for */
> + int fd; /* file to read PMs from */
> +} pmc_t;
> +
> +extern int pmc_init(pmc_t *pmc, pid_t pid, unsigned long max_pages);
> +extern int pmc_get_pfn_index(pmc_t *pmc, struct vma_area *vma, unsigned long addr, size_t *index);
> +extern void pmc_fini(pmc_t *pmc);
> +
> +#endif /* __CR_PAGEMAP_H__ */
> diff --git a/pagemap.c b/pagemap.c
> new file mode 100644
> index 000000000000..9f5b646f7ff0
> --- /dev/null
> +++ b/pagemap.c
> @@ -0,0 +1,126 @@
> +#include <unistd.h>
> +#include <fcntl.h>
> +
> +#include "compiler.h"
> +#include "xmalloc.h"
> +#include "pagemap.h"
> +#include "util.h"
> +#include "log.h"
> +#include "vma.h"
> +
> +#undef LOG_PREFIX
> +#define LOG_PREFIX "pagemap: "
> +
> +static inline void pmc_reset(pmc_t *pmc)
> +{
> + memzero(pmc, sizeof(*pmc));
> + pmc->fd = -1;
> +}
> +
> +int pmc_init(pmc_t *pmc, pid_t pid, unsigned long max_pages)
> +{
> + pmc_reset(pmc);
> +
> + pmc->nr_pages = max(max_pages, PAGEMAP_MAX_PAGES);
> + pmc->fd = open_proc(pid, "pagemap");
> + pmc->map = xmalloc(PAGEMAP_SIZE(pmc->nr_pages));
> + pmc->pid = pid;
> +
> + if (!pmc->map || pmc->fd < 0) {
> + pr_err("Failed to init pagemap for %d\n", pid);
> + pmc_fini(pmc);
> + return -1;
> + }
> +
> + pr_debug("init pid %d pages %zu covers %lu bytes\n",
> + pid, pmc->nr_pages, pmc->nr_pages * PAGE_SIZE);
> +
> + return 0;
> +}
> +
> +/* Fill the cache and return offset @off to the start of the VMA */
> +int pmc_get_pfn_index(pmc_t *pmc, struct vma_area *vma, unsigned long addr, size_t *index)
> +{
> + struct vma_area *v, *prev, *head;
> +
> + unsigned long size_cov = 0;
> + unsigned long size_gap = 0;
> + size_t nr_vmas = 0;
> + size_t len;
> +
> + pr_debug("Cache lookup %p (pmc %p - %p)\n",
> + (void *)addr, (void *)pmc->start, (void *)pmc->end);
> +
> + /*
> + * Cache hit -- the PMs has been read already.
> + */
> + if (likely(pmc->start <= addr && pmc->end > addr)) {
> + *index = PAGEMAP_PFN(addr - pmc->start);
> + return 0;
> + }
> +
> + /*
> + * Cache miss -- fill the cache.
> + */
> + pmc->start = vma->e->start;
> + pmc->end = pmc->start + pmc->nr_pages * PAGE_SIZE;
> + head = container_of(vma->list.prev, struct vma_area, list);
> + prev = NULL;
> +
> + pr_debug("Trying to fill cache (new pmc %p - %p)\n",
> + (void *)pmc->start, (void *)pmc->end);
> +
> + list_for_each_entry(v, &head->list, list) {
> + pr_debug("\t%p - %p <- %p - %p\n",
> + (void *)pmc->start, (void *)pmc->end,
> + (void *)v->e->start, (void *)v->e->end);
> + if (v->e->start > pmc->end ||
> + v->e->start < pmc->start ||
> + v->e->end > pmc->end)
> + break;
> +
> + size_cov += vma_area_len(v);
> + if (!prev) {
> + prev = v;
> + nr_vmas++;
> + continue;
> + }
> +
> + size_gap += (v->e->start - prev->e->end);
> +
> + /*
> + * Gap is too big, can't continue.
> + */
> + if (size_gap > size_cov) {
I don't like this condition.
Let's imagine that we have:
* big vma (50Mb)
* two small vma (20Mb) and the hole (10Mb) between them
|___50Mb___| |_20Gb_| 10Mb |_20Gb_|
According to this condition the hole will be in cache.
I think useing cache will be slower in this case.
Maybe the size of hole should be limited...
> + size_gap -= (v->e->start - prev->e->end);
> + size_cov -= vma_area_len(v);
> + pmc->end = prev->e->end;
> + break;
> + }
> +
> + prev = v;
> + nr_vmas++;
> + }
> +
> + pr_debug("nr_vmas %zu size_cov %lu size_gap %lu (%p %p)\n",
> + nr_vmas, size_cov, size_gap, (void *)pmc->start, (void *)pmc->end);
> + BUG_ON(!nr_vmas || !size_cov);
> +
> + pr_debug("Fill cache (new %p - %p)\n", (void *)pmc->start, (void *)pmc->end);
> + len = PAGEMAP_PFN_OFF(pmc->end - pmc->start);
> + if (pread(pmc->fd, pmc->map, len, PAGEMAP_PFN_OFF(pmc->start)) != len) {
> + pmc->start = pmc->end = 0;
> + pr_perror("Can't read %d's pagemap file", pmc->pid);
> + return -1;
> + }
> +
> + pr_debug("Cache filled (%p - %p)\n", (void *)pmc->start, (void *)pmc->end);
> + return pmc_get_pfn_index(pmc, vma, addr, index);
> +}
> +
> +void pmc_fini(pmc_t *pmc)
> +{
> + close_safe(&pmc->fd);
> + xfree(pmc->map);
> + pmc_reset(pmc);
> +}
More information about the CRIU
mailing list