[CRIU] [PATCH v4 3/7] shmem: implement PME derived pages state tracking

Andrei Vagin avagin at virtuozzo.com
Fri Sep 16 04:16:30 PDT 2016


On Thu, Aug 11, 2016 at 05:53:53PM +0300, Eugene Batalov wrote:
> From: Fyodor Bocharov <bocharovfedor at gmail.com>
> 
> Anon shmem pages state tracking allows us not to dump unused
> anon shmem pages at all.
> 
> To track anon anon shmem pages state we create a bitmap.
> Each 2 bits in this bitmap correspond to particular page.
> Each 2 bits store one page state:
> PST_DONT_DUMP, PST_DUMP, PST_ZERO, PST_DIRTY.
> This number of states is enough to decide what to do with the page
> on dump.
> 
> With anon shmem there is a peculiarity. To decide what state page has
> we need to examine its PME bits in all the processes that share it.
> So page state derived from PME bits in one process may be overriden
> by page state derived from PME bits from another process.
> See implementation of this overrides in the patch.

What if a process was forked and died between twp pre-dumps? It can
change somethin in a shared memory and you will skip these changes, will
not you?

> 
> Signed-off-by: Fyodor Bocharov <fbocharov at yandex.ru>
> Signed-off-by: Eugene Batalov <eabatalov89 at gmail.com>
> ---
>  criu/shmem.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 80 insertions(+), 5 deletions(-)
> 
> diff --git a/criu/shmem.c b/criu/shmem.c
> index f8a3ba3..ad6d7d6 100644
> --- a/criu/shmem.c
> +++ b/criu/shmem.c
> @@ -16,6 +16,7 @@
>  #include "mem.h"
>  #include "config.h"
>  #include "syscall-codes.h"
> +#include "asm/bitops.h"
>  
>  #include "protobuf.h"
>  #include "images/pagemap.pb-c.h"
> @@ -81,6 +82,7 @@ struct shmem_info {
>  		struct { /* For dump */
>  			unsigned long	start;
>  			unsigned long	end;
> +			unsigned long	*pstate_map;
>  		};
>  	};
>  };
> @@ -120,6 +122,73 @@ static struct shmem_info *shmem_find(unsigned long shmid)
>  	return NULL;
>  }
>  
> +#define PST_DONT_DUMP 0
> +#define PST_DUMP 1
> +#define PST_ZERO 2
> +#define PST_DIRTY 3
> +
> +#define PST_BITS 2
> +#define PST_BIT0_IX(pfn) ((pfn) * PST_BITS)
> +#define PST_BIT1_IX(pfn) (PST_BIT0_IX(pfn) + 1)
> +
> +static unsigned int get_pstate(unsigned long *pstate_map, unsigned long pfn)
> +{
> +	unsigned int bit0 = test_bit(PST_BIT0_IX(pfn), pstate_map) ? 1 : 0;
> +	unsigned int bit1 = test_bit(PST_BIT1_IX(pfn), pstate_map) ? 1 : 0;
> +	return (bit1 << 1) | bit0;
> +}
> +
> +static void set_pstate(unsigned long *pstate_map, unsigned long pfn,
> +		unsigned int pstate)
> +{
> +	if (pstate & 1)
> +		set_bit(PST_BIT0_IX(pfn), pstate_map);
> +	if (pstate & 2)
> +		set_bit(PST_BIT1_IX(pfn), pstate_map);
> +}
> +
> +static int expand_shmem(struct shmem_info *si, unsigned long new_size)
> +{
> +	unsigned long nr_pages, nr_map_items, map_size,
> +				nr_new_map_items, new_map_size;
> +
> +	nr_pages = DIV_ROUND_UP(si->size, PAGE_SIZE);
> +	nr_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
> +	map_size = nr_map_items * sizeof(*si->pstate_map);
> +
> +	nr_pages = DIV_ROUND_UP(new_size, PAGE_SIZE);
> +	nr_new_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
> +	new_map_size = nr_new_map_items * sizeof(*si->pstate_map);
> +
> +	BUG_ON(new_map_size < map_size);
> +
> +	si->pstate_map = xrealloc(si->pstate_map, new_map_size);
> +	if (!si->pstate_map)
> +		return -1;
> +	memzero(si->pstate_map + nr_map_items, new_map_size - map_size);
> +
> +	si->size = new_size;
> +	return 0;
> +}
> +
> +static void update_shmem_pmaps(struct shmem_info *si, u64 *map, VmaEntry *vma)
> +{
> +	unsigned long shmem_pfn, vma_pfn, vma_pgcnt;
> +
> +	vma_pgcnt = DIV_ROUND_UP(si->size - vma->pgoff, PAGE_SIZE);
> +	for (vma_pfn = 0; vma_pfn < vma_pgcnt; ++vma_pfn) {
> +		if (!should_dump_page(vma, map[vma_pfn]))
> +			continue;
> +
> +		shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE);
> +		if (map[vma_pfn] & PME_SOFT_DIRTY)
> +			set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY);
> +		else if (page_is_zero(map[vma_pfn]))
> +			set_pstate(si->pstate_map, shmem_pfn, PST_ZERO);
> +		else
> +			set_pstate(si->pstate_map, shmem_pfn, PST_DUMP);
> +	}
> +}
>  
>  int collect_sysv_shmem(unsigned long shmid, unsigned long size)
>  {
> @@ -489,26 +558,32 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
>  {
>  	struct shmem_info *si;
>  	unsigned long size = vma->pgoff + (vma->end - vma->start);
> -	(void)map;
>  
>  	si = shmem_find(vma->shmid);
>  	if (si) {
> -		if (si->size < size)
> -			si->size = size;
> +		if (si->size < size) {
> +			if (expand_shmem(si, size))
> +				return -1;
> +		}
> +		update_shmem_pmaps(si, map, vma);
> +
>  		return 0;
>  	}
>  
> -	si = xmalloc(sizeof(*si));
> +	si = xzalloc(sizeof(*si));
>  	if (!si)
>  		return -1;
>  
> -	si->size = size;
>  	si->pid = pid;
>  	si->start = vma->start;
>  	si->end = vma->end;
>  	si->shmid = vma->shmid;
>  	shmem_hash_add(si);
>  
> +	if (expand_shmem(si, size))
> +		return -1;
> +	update_shmem_pmaps(si, map, vma);
> +
>  	return 0;
>  }
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list