[CRIU] [PATCH v4 3/7] shmem: implement PME derived pages state tracking
Andrei Vagin
avagin at virtuozzo.com
Fri Sep 16 04:16:30 PDT 2016
On Thu, Aug 11, 2016 at 05:53:53PM +0300, Eugene Batalov wrote:
> From: Fyodor Bocharov <bocharovfedor at gmail.com>
>
> Anon shmem pages state tracking allows us not to dump unused
> anon shmem pages at all.
>
> To track anon anon shmem pages state we create a bitmap.
> Each 2 bits in this bitmap correspond to particular page.
> Each 2 bits store one page state:
> PST_DONT_DUMP, PST_DUMP, PST_ZERO, PST_DIRTY.
> This number of states is enough to decide what to do with the page
> on dump.
>
> With anon shmem there is a peculiarity. To decide what state page has
> we need to examine its PME bits in all the processes that share it.
> So page state derived from PME bits in one process may be overriden
> by page state derived from PME bits from another process.
> See implementation of this overrides in the patch.
What if a process was forked and died between twp pre-dumps? It can
change somethin in a shared memory and you will skip these changes, will
not you?
>
> Signed-off-by: Fyodor Bocharov <fbocharov at yandex.ru>
> Signed-off-by: Eugene Batalov <eabatalov89 at gmail.com>
> ---
> criu/shmem.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 80 insertions(+), 5 deletions(-)
>
> diff --git a/criu/shmem.c b/criu/shmem.c
> index f8a3ba3..ad6d7d6 100644
> --- a/criu/shmem.c
> +++ b/criu/shmem.c
> @@ -16,6 +16,7 @@
> #include "mem.h"
> #include "config.h"
> #include "syscall-codes.h"
> +#include "asm/bitops.h"
>
> #include "protobuf.h"
> #include "images/pagemap.pb-c.h"
> @@ -81,6 +82,7 @@ struct shmem_info {
> struct { /* For dump */
> unsigned long start;
> unsigned long end;
> + unsigned long *pstate_map;
> };
> };
> };
> @@ -120,6 +122,73 @@ static struct shmem_info *shmem_find(unsigned long shmid)
> return NULL;
> }
>
> +#define PST_DONT_DUMP 0
> +#define PST_DUMP 1
> +#define PST_ZERO 2
> +#define PST_DIRTY 3
> +
> +#define PST_BITS 2
> +#define PST_BIT0_IX(pfn) ((pfn) * PST_BITS)
> +#define PST_BIT1_IX(pfn) (PST_BIT0_IX(pfn) + 1)
> +
> +static unsigned int get_pstate(unsigned long *pstate_map, unsigned long pfn)
> +{
> + unsigned int bit0 = test_bit(PST_BIT0_IX(pfn), pstate_map) ? 1 : 0;
> + unsigned int bit1 = test_bit(PST_BIT1_IX(pfn), pstate_map) ? 1 : 0;
> + return (bit1 << 1) | bit0;
> +}
> +
> +static void set_pstate(unsigned long *pstate_map, unsigned long pfn,
> + unsigned int pstate)
> +{
> + if (pstate & 1)
> + set_bit(PST_BIT0_IX(pfn), pstate_map);
> + if (pstate & 2)
> + set_bit(PST_BIT1_IX(pfn), pstate_map);
> +}
> +
> +static int expand_shmem(struct shmem_info *si, unsigned long new_size)
> +{
> + unsigned long nr_pages, nr_map_items, map_size,
> + nr_new_map_items, new_map_size;
> +
> + nr_pages = DIV_ROUND_UP(si->size, PAGE_SIZE);
> + nr_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
> + map_size = nr_map_items * sizeof(*si->pstate_map);
> +
> + nr_pages = DIV_ROUND_UP(new_size, PAGE_SIZE);
> + nr_new_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
> + new_map_size = nr_new_map_items * sizeof(*si->pstate_map);
> +
> + BUG_ON(new_map_size < map_size);
> +
> + si->pstate_map = xrealloc(si->pstate_map, new_map_size);
> + if (!si->pstate_map)
> + return -1;
> + memzero(si->pstate_map + nr_map_items, new_map_size - map_size);
> +
> + si->size = new_size;
> + return 0;
> +}
> +
> +static void update_shmem_pmaps(struct shmem_info *si, u64 *map, VmaEntry *vma)
> +{
> + unsigned long shmem_pfn, vma_pfn, vma_pgcnt;
> +
> + vma_pgcnt = DIV_ROUND_UP(si->size - vma->pgoff, PAGE_SIZE);
> + for (vma_pfn = 0; vma_pfn < vma_pgcnt; ++vma_pfn) {
> + if (!should_dump_page(vma, map[vma_pfn]))
> + continue;
> +
> + shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE);
> + if (map[vma_pfn] & PME_SOFT_DIRTY)
> + set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY);
> + else if (page_is_zero(map[vma_pfn]))
> + set_pstate(si->pstate_map, shmem_pfn, PST_ZERO);
> + else
> + set_pstate(si->pstate_map, shmem_pfn, PST_DUMP);
> + }
> +}
>
> int collect_sysv_shmem(unsigned long shmid, unsigned long size)
> {
> @@ -489,26 +558,32 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
> {
> struct shmem_info *si;
> unsigned long size = vma->pgoff + (vma->end - vma->start);
> - (void)map;
>
> si = shmem_find(vma->shmid);
> if (si) {
> - if (si->size < size)
> - si->size = size;
> + if (si->size < size) {
> + if (expand_shmem(si, size))
> + return -1;
> + }
> + update_shmem_pmaps(si, map, vma);
> +
> return 0;
> }
>
> - si = xmalloc(sizeof(*si));
> + si = xzalloc(sizeof(*si));
> if (!si)
> return -1;
>
> - si->size = size;
> si->pid = pid;
> si->start = vma->start;
> si->end = vma->end;
> si->shmid = vma->shmid;
> shmem_hash_add(si);
>
> + if (expand_shmem(si, size))
> + return -1;
> + update_shmem_pmaps(si, map, vma);
> +
> return 0;
> }
>
> --
> 1.9.1
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list