[CRIU] [PATCH RFC 0/2] page-read: use in-memory PagemapEntry's

Mike Rapoport rppt at linux.vnet.ibm.com
Mon Jun 13 02:29:51 PDT 2016


On Thu, Jun 09, 2016 at 04:59:46PM +0300, Pavel Emelyanov wrote:
> On 06/07/2016 06:22 PM, Mike Rapoport wrote:
> > On Tue, Jun 7, 2016 at 6:16 PM, Pavel Emelyanov <xemul at virtuozzo.com> wrote:
> >> On 06/07/2016 03:35 PM, Mike Rapoport wrote:
> >>> On Tue, Jun 07, 2016 at 02:55:30PM +0300, Pavel Emelyanov wrote:
> >>>> pagemap.img-s use bfd engine to cache reads from images. Your
> >>>> patch creates double buffering for them ;)
> >>>
> >>> Well, yes. But no :)
> >>> The bfd caches raw protobuf data, while the in-memory pagemap caches
> >>> unpacked PagemapEntrie's. There's indeed some duplication, but you anyway
> >>> have to go through 'bread -> unpack' to use pagemap.
> >>
> >> Agree :)
> >>
> >>>> Any ideas how to handle it?
> >>>
> >>> We read the entire pagemap and then close the image? ;-)
> >>
> >> Oh, do we do this with this set?
> > 
> > Except the close part :)
> 
> Would you send an incremental patch doing this, please?

Found off-by-one in pme index calculations...
Resending the entire patch.

 
>From 7a7c93ff096355feacd7a486769c4f7d2d36e430 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt at linux.vnet.ibm.com>
Date: Thu, 2 Jun 2016 15:25:41 +0300
Subject: [CRIU][PATCH v2 2/2] criu: page-read: read pagemaps at open time and
 keep them in memory

Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
 criu/include/page-read.h |  4 +++
 criu/page-read.c         | 85 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/criu/include/page-read.h b/criu/include/page-read.h
index 4b6b5b7..4c6d21e 100644
--- a/criu/include/page-read.h
+++ b/criu/include/page-read.h
@@ -68,6 +68,10 @@ struct page_read {
 	struct iovec bunch;		/* record consequent neighbour
 					   iovecs to punch together */
 	unsigned id; /* for logging */
+
+	PagemapEntry **pmes;
+	int nr_pmes;
+	int curr_pme;
 };
 
 #define PR_SHMEM	0x1
diff --git a/criu/page-read.c b/criu/page-read.c
index 933f047..f06b11f 100644
--- a/criu/page-read.c
+++ b/criu/page-read.c
@@ -59,12 +59,12 @@ void iovec2pagemap(struct iovec *iov, PagemapEntry *pe)
 
 static int get_pagemap(struct page_read *pr, struct iovec *iov)
 {
-	int ret;
 	PagemapEntry *pe;
 
-	ret = pb_read_one_eof(pr->pmi, &pe, PB_PAGEMAP);
-	if (ret <= 0)
-		return ret;
+	if (pr->curr_pme >= pr->nr_pmes)
+		return 0;
+
+	pe = pr->pmes[pr->curr_pme];
 
 	pagemap2iovec(pe, iov);
 
@@ -81,7 +81,7 @@ static int get_pagemap(struct page_read *pr, struct iovec *iov)
 
 static void put_pagemap(struct page_read *pr)
 {
-	pagemap_entry__free_unpacked(pr->pe, NULL);
+	pr->curr_pme++;
 }
 
 static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
@@ -216,6 +216,16 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
 	return 1;
 }
 
+static void free_pagemaps(struct page_read *pr)
+{
+	int i;
+
+	for (i = 0; i < pr->nr_pmes; i++)
+		pagemap_entry__free_unpacked(pr->pmes[i], NULL);
+
+	xfree(pr->pmes);
+}
+
 static void close_page_read(struct page_read *pr)
 {
 	int ret;
@@ -233,9 +243,13 @@ static void close_page_read(struct page_read *pr)
 		xfree(pr->parent);
 	}
 
-	close_image(pr->pmi);
+	if (pr->pmi)
+		close_image(pr->pmi);
 	if (pr->pi)
 		close_image(pr->pi);
+
+	if (pr->pmes)
+		free_pagemaps(pr);
 }
 
 static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
@@ -272,6 +286,60 @@ err_cl:
 	return -1;
 }
 
+/*
+ * The pagemap entry size is at least 8 bytes for small mappings with
+ * low address and may get to 18 bytes or even more for large mappings
+ * with high address and in_parent flag set. 16 seems to be nice round
+ * number to minimize {over,under}-allocations
+ */
+#define PAGEMAP_ENTRY_SIZE_ESTIMATE 16
+
+static int init_pagemaps(struct page_read *pr)
+{
+	off_t fsize;
+	int nr_pmes, nr_realloc;
+
+	fsize = img_raw_size(pr->pmi);
+	if (fsize < 0)
+		return -1;
+
+	nr_pmes = fsize / PAGEMAP_ENTRY_SIZE_ESTIMATE + 1;
+	nr_realloc = nr_pmes / 2;
+
+	pr->pmes = xzalloc(nr_pmes * sizeof(*pr->pmes));
+	if (!pr->pmes)
+		return -1;
+
+	pr->nr_pmes = pr->curr_pme = 0;
+
+	while (1) {
+		int ret = pb_read_one_eof(pr->pmi, &pr->pmes[pr->nr_pmes],
+					  PB_PAGEMAP);
+		if (ret < 0)
+			goto free_pagemaps;
+		if (ret == 0)
+			break;
+
+		pr->nr_pmes++;
+		if (pr->nr_pmes >= nr_pmes) {
+			nr_pmes += nr_realloc;
+			pr->pmes = xrealloc(pr->pmes,
+					    nr_pmes * sizeof(*pr->pmes));
+			if (!pr->pmes)
+				goto free_pagemaps;
+		}
+	}
+
+	return 0;
+
+	close_image(pr->pmi);
+	pr->pmi = NULL;
+
+free_pagemaps:
+	free_pagemaps(pr);
+	return -1;
+}
+
 int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
 {
 	int flags, i_typ, i_typ_o;
@@ -323,6 +391,11 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
 		return -1;
 	}
 
+	if (init_pagemaps(pr)) {
+		close_page_read(pr);
+		return -1;
+	}
+
 	pr->get_pagemap = get_pagemap;
 	pr->put_pagemap = put_pagemap;
 	pr->read_pages = read_pagemap_page;
-- 
1.9.1



More information about the CRIU mailing list