[CRIU] [PATCH v4 3/7] shmem: implement PME derived pages state tracking

Eugene Batalov eabatalov89 at gmail.com
Thu Aug 11 07:53:53 PDT 2016


From: Fyodor Bocharov <bocharovfedor at gmail.com>

Anon shmem pages state tracking allows us not to dump unused
anon shmem pages at all.

To track anon anon shmem pages state we create a bitmap.
Each 2 bits in this bitmap correspond to particular page.
Each 2 bits store one page state:
PST_DONT_DUMP, PST_DUMP, PST_ZERO, PST_DIRTY.
This number of states is enough to decide what to do with the page
on dump.

With anon shmem there is a peculiarity. To decide what state page has
we need to examine its PME bits in all the processes that share it.
So page state derived from PME bits in one process may be overriden
by page state derived from PME bits from another process.
See implementation of this overrides in the patch.

Signed-off-by: Fyodor Bocharov <fbocharov at yandex.ru>
Signed-off-by: Eugene Batalov <eabatalov89 at gmail.com>
---
 criu/shmem.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 80 insertions(+), 5 deletions(-)

diff --git a/criu/shmem.c b/criu/shmem.c
index f8a3ba3..ad6d7d6 100644
--- a/criu/shmem.c
+++ b/criu/shmem.c
@@ -16,6 +16,7 @@
 #include "mem.h"
 #include "config.h"
 #include "syscall-codes.h"
+#include "asm/bitops.h"
 
 #include "protobuf.h"
 #include "images/pagemap.pb-c.h"
@@ -81,6 +82,7 @@ struct shmem_info {
 		struct { /* For dump */
 			unsigned long	start;
 			unsigned long	end;
+			unsigned long	*pstate_map;
 		};
 	};
 };
@@ -120,6 +122,73 @@ static struct shmem_info *shmem_find(unsigned long shmid)
 	return NULL;
 }
 
+#define PST_DONT_DUMP 0
+#define PST_DUMP 1
+#define PST_ZERO 2
+#define PST_DIRTY 3
+
+#define PST_BITS 2
+#define PST_BIT0_IX(pfn) ((pfn) * PST_BITS)
+#define PST_BIT1_IX(pfn) (PST_BIT0_IX(pfn) + 1)
+
+static unsigned int get_pstate(unsigned long *pstate_map, unsigned long pfn)
+{
+	unsigned int bit0 = test_bit(PST_BIT0_IX(pfn), pstate_map) ? 1 : 0;
+	unsigned int bit1 = test_bit(PST_BIT1_IX(pfn), pstate_map) ? 1 : 0;
+	return (bit1 << 1) | bit0;
+}
+
+static void set_pstate(unsigned long *pstate_map, unsigned long pfn,
+		unsigned int pstate)
+{
+	if (pstate & 1)
+		set_bit(PST_BIT0_IX(pfn), pstate_map);
+	if (pstate & 2)
+		set_bit(PST_BIT1_IX(pfn), pstate_map);
+}
+
+static int expand_shmem(struct shmem_info *si, unsigned long new_size)
+{
+	unsigned long nr_pages, nr_map_items, map_size,
+				nr_new_map_items, new_map_size;
+
+	nr_pages = DIV_ROUND_UP(si->size, PAGE_SIZE);
+	nr_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
+	map_size = nr_map_items * sizeof(*si->pstate_map);
+
+	nr_pages = DIV_ROUND_UP(new_size, PAGE_SIZE);
+	nr_new_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
+	new_map_size = nr_new_map_items * sizeof(*si->pstate_map);
+
+	BUG_ON(new_map_size < map_size);
+
+	si->pstate_map = xrealloc(si->pstate_map, new_map_size);
+	if (!si->pstate_map)
+		return -1;
+	memzero(si->pstate_map + nr_map_items, new_map_size - map_size);
+
+	si->size = new_size;
+	return 0;
+}
+
+static void update_shmem_pmaps(struct shmem_info *si, u64 *map, VmaEntry *vma)
+{
+	unsigned long shmem_pfn, vma_pfn, vma_pgcnt;
+
+	vma_pgcnt = DIV_ROUND_UP(si->size - vma->pgoff, PAGE_SIZE);
+	for (vma_pfn = 0; vma_pfn < vma_pgcnt; ++vma_pfn) {
+		if (!should_dump_page(vma, map[vma_pfn]))
+			continue;
+
+		shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE);
+		if (map[vma_pfn] & PME_SOFT_DIRTY)
+			set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY);
+		else if (page_is_zero(map[vma_pfn]))
+			set_pstate(si->pstate_map, shmem_pfn, PST_ZERO);
+		else
+			set_pstate(si->pstate_map, shmem_pfn, PST_DUMP);
+	}
+}
 
 int collect_sysv_shmem(unsigned long shmid, unsigned long size)
 {
@@ -489,26 +558,32 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
 {
 	struct shmem_info *si;
 	unsigned long size = vma->pgoff + (vma->end - vma->start);
-	(void)map;
 
 	si = shmem_find(vma->shmid);
 	if (si) {
-		if (si->size < size)
-			si->size = size;
+		if (si->size < size) {
+			if (expand_shmem(si, size))
+				return -1;
+		}
+		update_shmem_pmaps(si, map, vma);
+
 		return 0;
 	}
 
-	si = xmalloc(sizeof(*si));
+	si = xzalloc(sizeof(*si));
 	if (!si)
 		return -1;
 
-	si->size = size;
 	si->pid = pid;
 	si->start = vma->start;
 	si->end = vma->end;
 	si->shmid = vma->shmid;
 	shmem_hash_add(si);
 
+	if (expand_shmem(si, size))
+		return -1;
+	update_shmem_pmaps(si, map, vma);
+
 	return 0;
 }
 
-- 
1.9.1



More information about the CRIU mailing list