[PATCH] pagemap-cache: Introduce engine, v2

Cyrill Gorcunov gorcunov at openvz.org
Tue Feb 11 09:45:28 PST 2014


Pavel reported that in case if there a big number
of small vmas present in the dumpee we're reading
/proc/pid/pagemap too frequently.

To speedup this procedue we inroduce pagemap cache.

The interface is:
 - pmc_init/pmc_fini for cache initialization and freeing
 - pmc_get_map to retrieve specific PMEs array for VMA area

v2:
 - Move internal constants to pagemap-cache.c
 - Make PAGEMAP_LEN to accept virtual address/size
 - Don't adjust low bound in caching mode to save a couple of code bytes

Reported-by: Pavel Emelyanov <xemul at parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 Makefile.crtools        |   1 +
 include/pagemap-cache.h |  30 ++++++++++
 pagemap-cache.c         | 150 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 181 insertions(+)
 create mode 100644 include/pagemap-cache.h
 create mode 100644 pagemap-cache.c

diff --git a/Makefile.crtools b/Makefile.crtools
index 374cd8570289..6effc3e0958b 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -53,6 +53,7 @@ obj-y	+= file-lock.o
 obj-y	+= page-pipe.o
 obj-y	+= page-xfer.o
 obj-y	+= page-read.o
+obj-y	+= pagemap-cache.o
 obj-y	+= kerndat.o
 obj-y	+= stats.o
 obj-y	+= string.o
diff --git a/include/pagemap-cache.h b/include/pagemap-cache.h
new file mode 100644
index 000000000000..f4d9c4e43110
--- /dev/null
+++ b/include/pagemap-cache.h
@@ -0,0 +1,30 @@
+#ifndef __CR_PAGEMAP_H__
+#define __CR_PAGEMAP_H__
+
+#include <sys/types.h>
+#include "asm/types.h"
+
+#include "list.h"
+
+struct vma_area;
+
+#define	PAGEMAP_PFN(addr)	((addr) / PAGE_SIZE)
+#define PAGEMAP_PFN_OFF(addr)	(PAGEMAP_PFN(addr) * sizeof(u64))
+
+typedef struct {
+	pid_t			pid;		/* which process it belongs */
+	unsigned long		start;		/* start of area */
+	unsigned long		end;		/* end of area */
+	struct list_head	*vma_head;	/* list head of VMAs we're serving */
+	u64			*map;		/* local buffer */
+	size_t			map_len;	/* length of a buffer */
+	int			fd;		/* file to read PMs from */
+} pmc_t;
+
+#define PMC_INIT (pmc_t){ }
+
+extern int pmc_init(pmc_t *pmc, pid_t pid, struct list_head *vma_head, size_t size);
+extern u64 *pmc_get_map(pmc_t *pmc, struct vma_area *vma);
+extern void pmc_fini(pmc_t *pmc);
+
+#endif /* __CR_PAGEMAP_H__ */
diff --git a/pagemap-cache.c b/pagemap-cache.c
new file mode 100644
index 000000000000..da0c0f2c05ec
--- /dev/null
+++ b/pagemap-cache.c
@@ -0,0 +1,150 @@
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "pagemap-cache.h"
+#include "compiler.h"
+#include "xmalloc.h"
+#include "util.h"
+#include "log.h"
+#include "vma.h"
+
+#undef	LOG_PREFIX
+#define LOG_PREFIX "pagemap-cache: "
+
+/* To carry up to 2M of physical memory */
+#define PMC_SHIFT		(21)
+#define PMC_SIZE		(1ul << PMC_SHIFT)
+#define PMC_MASK		(~(PMC_SIZE - 1))
+#define PMC_SIZE_GAP		(PMC_SIZE / 4)
+
+#define PAGEMAP_LEN(addr)	(PAGEMAP_PFN(addr) * sizeof(u64))
+
+static inline void pmc_reset(pmc_t *pmc)
+{
+	memzero(pmc, sizeof(*pmc));
+	pmc->fd = -1;
+}
+
+static inline void pmc_zap(pmc_t *pmc)
+{
+	pmc->start = pmc->end = 0;
+}
+
+int pmc_init(pmc_t *pmc, pid_t pid, struct list_head *vma_head, size_t size)
+{
+	size_t map_size = max(size, PMC_SIZE);
+	pmc_reset(pmc);
+
+	BUG_ON(!vma_head);
+
+	pmc->pid	= pid;
+	pmc->fd		= open_proc(pid, "pagemap");
+	pmc->map_len	= PAGEMAP_LEN(map_size);
+	pmc->map	= xmalloc(pmc->map_len);
+	pmc->vma_head	= vma_head;
+
+	if (!pmc->map || pmc->fd < 0) {
+		pr_err("Failed to init pagemap for %d\n", pid);
+		pmc_fini(pmc);
+		return -1;
+	}
+
+	pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
+
+	return 0;
+}
+
+static inline u64 *__pmc_get_map(pmc_t *pmc, unsigned long addr)
+{
+	return &pmc->map[PAGEMAP_PFN(addr - pmc->start)];
+}
+
+static int pmc_fill_cache(pmc_t *pmc, struct vma_area *vma)
+{
+	unsigned long low = vma->e->start & PMC_MASK;
+	unsigned long high = low + PMC_SIZE;
+	size_t len = vma_area_len(vma);
+	size_t size_map;
+
+	pmc->start = vma->e->start;
+	pmc->end = vma->e->end;
+
+	pr_debug("filling VMA %lx-%lx (%luK) [l:%lx h:%lx]\n",
+		 vma->e->start, vma->e->end, len >> 10, low, high);
+
+	/*
+	 * If we meet a small VMA, lets try to fit 2M cache
+	 * window at least 75% full, otherwise left as a plain
+	 * "one vma at a time" read. Note the VMAs in cache must
+	 * fit in solid manner, iow -- either the whole vma fits
+	 * the cache window, either plain read is used.
+	 *
+	 * The benefit (apart redusing the number of read() calls)
+	 * is to keep in kernel THP lock as less as possible.
+	 */
+	if (len < PMC_SIZE && (vma->e->start - low) < PMC_SIZE_GAP) {
+		size_t size_cov = len;
+		size_t nr_vmas = 1;
+
+		pr_debug("\t%16lx-%-16lx nr:%-5zu cov:%zu\n",
+			 vma->e->start, vma->e->end, nr_vmas, size_cov);
+
+		list_for_each_entry_continue(vma, pmc->vma_head, list) {
+			if (vma->e->start > high || vma->e->end > high)
+				break;
+
+			BUG_ON(vma->e->start < low);
+			size_cov += vma_area_len(vma);
+			nr_vmas++;
+
+			pr_debug("\t%16lx-%-16lx nr:%-5zu cov:%zu\n",
+				 vma->e->start, vma->e->end, nr_vmas, size_cov);
+		}
+
+		if (nr_vmas > 1) {
+			/*
+			 * Note we don't touch low bound since it's set
+			 * to first VMA start already and not updating it
+			 * allows us to save a couple of code bytes.
+			 */
+			pmc->end = high;
+			pr_debug("\tcache  mode [l:%lx h:%lx]\n", pmc->start, pmc->end);
+		} else
+			pr_debug("\tsimple mode [l:%lx h:%lx]\n", pmc->start, pmc->end);
+	}
+
+	size_map = PAGEMAP_LEN(pmc->end - pmc->start);
+	BUG_ON(pmc->map_len < size_map);
+
+	if (pread(pmc->fd, pmc->map, size_map, PAGEMAP_PFN_OFF(pmc->start)) != size_map) {
+		pmc_zap(pmc);
+		pr_perror("Can't read %d's pagemap file", pmc->pid);
+		return -1;
+	}
+
+	return 0;
+}
+
+u64 *pmc_get_map(pmc_t *pmc, struct vma_area *vma)
+{
+	/* Hit */
+	if (likely(pmc->start <= vma->e->start && pmc->end >= vma->e->end))
+		return __pmc_get_map(pmc, vma->e->start);
+
+	/* Miss, refill the cache */
+	if (pmc_fill_cache(pmc, vma)) {
+		pr_err("Failed to fill cache for %d (%lx-%lx)\n",
+		       pmc->pid, vma->e->start, vma->e->end);
+		return NULL;
+	}
+
+	/* Hit for sure */
+	return __pmc_get_map(pmc, vma->e->start);
+}
+
+void pmc_fini(pmc_t *pmc)
+{
+	close_safe(&pmc->fd);
+	xfree(pmc->map);
+	pmc_reset(pmc);
+}
-- 
1.8.3.1


--/QKKmeG/X/bPShih--


More information about the CRIU mailing list