[CRIU] Re: [PATCH cr] [RFC] cr-restore: remove unshared pages from inherited private mappings

Pavel Emelyanov xemul at parallels.com
Thu Oct 18 09:27:06 EDT 2012


On 10/18/2012 03:39 PM, Andrey Vagin wrote:
> A parent process can change a few pages after forking a child and
> all this pages should not be avaliable from the child.
> 
> Each vma has a bitmap of existent pages. Parent's and child's bitmaps
> can be compared and all pages which are not present in a child bitmap
> are dropped.
> 
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  cr-restore.c      |   43 +++++++++++++++++++++++++++++++++++++++++--
>  include/crtools.h |    2 ++
>  2 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 4f17956..327e93f 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -178,6 +178,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
>  			struct vma_area **pvma, struct list_head *pvma_list)
>  {
>  	int ret;
> +	unsigned long nr_pages;
>  	struct vma_area *p = *pvma;
>  
>  	if (!vma_priv(&vma->vma))
> @@ -194,6 +195,11 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
>  		vma->vma.shmid = 0;
>  	}
>  
> +	nr_pages = vma_entry_len(&vma->vma) / PAGE_SIZE;
> +	vma->page_bitmap = xzalloc(BITS_TO_LONGS(nr_pages) * sizeof(long));
> +	if (vma->page_bitmap == NULL)
> +		return -1;
> +
>  	list_for_each_entry_continue(p, pvma_list, list) {
>  		if (p->vma.start > vma->vma.start)
>  			 break;
> @@ -203,6 +209,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
>  			pr_info("COW 0x%016lx-0x%016lx 0x%016lx vma\n",
>  				vma->vma.start, vma->vma.end, vma->vma.pgoff);
>  			vma->vma.shmid = p->vma.shmid;
> +			break;
>  		}
>  
>  	}
> @@ -224,7 +231,8 @@ static int map_private_vma(pid_t pid, struct vma_area *vma,
>  			return -1;
>  		}
>  		vma->vma.shmid = (unsigned long) addr;
> -	}
> +	} else
> +		vma->ppage_bitmap = p->page_bitmap;
>  
>  	if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE))
>  		close(vma->vma.fd);
> @@ -247,7 +255,7 @@ static int restore_anon_vma_content(pid_t pid)
>  	 * Read page contents.
>  	 */
>  	while (1) {
> -		u64 va;
> +		u64 va, page_offset;
>  		char buf[PAGE_SIZE];
>  		void *p;
>  
> @@ -267,6 +275,12 @@ static int restore_anon_vma_content(pid_t pid)
>  			vma = list_entry(vma->list.next, struct vma_area, list);
>  		}
>  
> +		page_offset = (va - vma->vma.start) / PAGE_SIZE;
> +		if (vma->page_bitmap)
> +			set_bit(page_offset, vma->page_bitmap);
> +		if (vma->ppage_bitmap)
> +			clear_bit(page_offset, vma->ppage_bitmap);
> +
>  		ret = read(fd, buf, PAGE_SIZE);
>  		if (ret != PAGE_SIZE) {
>  			pr_err("Can'r read mapping page %d\n", ret);
> @@ -281,6 +295,31 @@ static int restore_anon_vma_content(pid_t pid)
>  	}
>  	close(fd);
>  
> +	/* Remove pages, which were not shared with a child */
> +	list_for_each_entry(vma, &vma_list, list) {
> +		unsigned long size, i = 0;
> +
> +		if (vma->ppage_bitmap == NULL)
> +			continue;
> +
> +		size = vma_entry_len(&vma->vma) / PAGE_SIZE;
> +		while (1) {
> +			/* Find all pages, which avaliable only for a parent */
> +			i = find_next_bit(vma->ppage_bitmap, size, i);
> +
> +			if ( i >= size)
> +				break;
> +
> +			madvise((void *)(vma->vma.shmid + PAGE_SIZE * i),
> +						PAGE_SIZE, MADV_DONTNEED);
> +			if (ret < 0) {

Does it work ever?

> +				pr_perror("madvise failed\n");
> +				return -1;
> +			}
> +			i++;
> +		}
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/include/crtools.h b/include/crtools.h
> index 146524d..c3e9e5b 100644
> --- a/include/crtools.h
> +++ b/include/crtools.h
> @@ -213,6 +213,8 @@ struct vma_area {
>  	struct list_head	list;
>  	VmaEntry		vma;
>  	int			vm_file_fd;
> +	unsigned long		*page_bitmap;  /* existent pages */
> +	unsigned long		*ppage_bitmap; /* parent's existent pages */
>  };
>  
>  #define vma_area_is(vma_area, s)	vma_entry_is(&((vma_area)->vma), s)
> 




More information about the CRIU mailing list