[CRIU] [PATCH 13/15] cr-restore: remove unshared pages from inherited private mappings
Pavel Emelyanov
xemul at parallels.com
Mon Nov 12 06:44:54 EST 2012
On 11/02/2012 05:32 PM, Andrey Vagin wrote:
> A parent process can change a few pages after forking a child and
> all this pages should not be avaliable from the child.
>
> Each vma has a bitmap of existent pages. Parent's and child's bitmaps
> can be compared and all pages which are not present in a child bitmap
> are dropped.
>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> cr-restore.c | 42 +++++++++++++++++++++++++++++++++++++++++-
> include/crtools.h | 2 ++
> 2 files changed, 43 insertions(+), 1 deletion(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 9749634..eb759cb 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -187,6 +187,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
> {
> int ret;
> void *addr;
> + unsigned long nr_pages;
> struct vma_area *p = *pvma;
>
> if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE)) {
> @@ -200,6 +201,11 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
> vma->vma.shmid = 0;
> }
>
> + nr_pages = vma_entry_len(&vma->vma) / PAGE_SIZE;
> + vma->page_bitmap = xzalloc(BITS_TO_LONGS(nr_pages) * sizeof(long));
> + if (vma->page_bitmap == NULL)
> + return -1;
> +
> list_for_each_entry_continue(p, pvma_list, list) {
> if (p->vma.start > vma->vma.start)
> break;
> @@ -231,6 +237,8 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
> }
> vma_premmaped_start(&(vma->vma)) = (unsigned long) addr;
> } else {
> + vma->ppage_bitmap = p->page_bitmap;
> +
> addr = mremap((void *)vma_premmaped_start(&vma->vma),
> vma_area_len(vma), vma_area_len(vma),
> MREMAP_FIXED | MREMAP_MAYMOVE, tgt_addr);
> @@ -263,7 +271,7 @@ static int restore_priv_vma_content(pid_t pid)
> * Read page contents.
> */
> while (1) {
> - u64 va;
> + u64 va, page_offset;
> char buf[PAGE_SIZE];
> void *p;
>
> @@ -283,6 +291,12 @@ static int restore_priv_vma_content(pid_t pid)
> vma = list_entry(vma->list.next, struct vma_area, list);
> }
>
> + page_offset = (va - vma->vma.start) / PAGE_SIZE;
> + if (vma->page_bitmap)
> + set_bit(page_offset, vma->page_bitmap);
Can page_bitmap be NULL?
> + if (vma->ppage_bitmap)
> + clear_bit(page_offset, vma->ppage_bitmap);
> +
> ret = read(fd, buf, PAGE_SIZE);
> if (ret != PAGE_SIZE) {
> pr_err("Can'r read mapping page %d\n", ret);
> @@ -298,6 +312,32 @@ static int restore_priv_vma_content(pid_t pid)
> }
> close(fd);
>
> + /* Remove pages, which were not shared with a child */
> + list_for_each_entry(vma, &rst_vma_list, list) {
> + unsigned long size, i = 0;
> + void *addr = (void *) vma_premmaped_start(&vma->vma);
> +
> + if (vma->ppage_bitmap == NULL)
> + continue;
> +
> + size = vma_entry_len(&vma->vma) / PAGE_SIZE;
> + while (1) {
> + /* Find all pages, which avaliable only for a parent */
> + i = find_next_bit(vma->ppage_bitmap, size, i);
This code means "find all pages available for a parent", w/o only.
> +
> + if ( i >= size)
> + break;
> +
> + ret = madvise(addr + PAGE_SIZE * i,
> + PAGE_SIZE, MADV_DONTNEED);
> + if (ret < 0) {
> + pr_perror("madvise failed\n");
> + return -1;
> + }
> + i++;
> + }
> + }
> +
> return 0;
> }
>
> diff --git a/include/crtools.h b/include/crtools.h
> index 37e50ca..ec76ec1 100644
> --- a/include/crtools.h
> +++ b/include/crtools.h
> @@ -215,6 +215,8 @@ struct vma_area {
> struct list_head list;
> VmaEntry vma;
> int vm_file_fd;
> + unsigned long *page_bitmap; /* existent pages */
Not used anywhere for read, this is set-only field.
> + unsigned long *ppage_bitmap; /* parent's existent pages */
> };
>
> #define vma_area_is(vma_area, s) vma_entry_is(&((vma_area)->vma), s)
>
More information about the CRIU
mailing list