[CRIU] [PATCH 2/5] mm: map grow-down VMA-s with guard pages

Pavel Emelyanov xemul at parallels.com
Wed Aug 28 12:10:28 EDT 2013


On 08/26/2013 05:15 PM, Andrey Vagin wrote:
> In /proc/pid/maps grow-down VMA-s are shown without guard pages, but
> sometime these "guard" pages can contain usefull data. For example if
> a real guard page has been remmaped by another VMA. Let's call such
> pages as fake guard pages.
> 
> So when a grow-down VMA is mmaped on restore, it should be mapped with
> one more guard page to restore content of the fake guard page.
> 
> https://bugzilla.openvz.org/show_bug.cgi?id=2715
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  cr-restore.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 53 insertions(+), 7 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 14bf300..8fea9ee 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -207,7 +207,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>  {
>  	int ret;
>  	void *addr, *paddr = NULL;
> -	unsigned long nr_pages;
> +	unsigned long nr_pages, size;
>  	struct vma_area *p = *pvma;
>  
>  	if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE)) {
> @@ -242,6 +242,17 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>  
>  	*pvma = p;
>  
> +	/*
> +	 * A grow-down VMA has a guard page, which protect a VMA below it.
> +	 * So one more page is mapped here to restore content of the first page
> +	 */
> +	if (vma->vma.flags & MAP_GROWSDOWN) {
> +		vma->vma.start -= PAGE_SIZE;
> +		if (paddr)
> +			paddr -= PAGE_SIZE;
> +	}
> +
> +	size = vma_entry_len(&vma->vma);
>  	if (paddr == NULL) {
>  		/*
>  		 * The respective memory area was NOT found in the parent.
> @@ -250,7 +261,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>  		pr_info("Map 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
>  			vma->vma.start, vma->vma.end, vma->vma.pgoff);
>  
> -		addr = mmap(tgt_addr, vma_entry_len(&vma->vma),
> +		addr = mmap(tgt_addr, size,
>  				vma->vma.prot | PROT_WRITE,
>  				vma->vma.flags | MAP_FIXED,
>  				vma->vma.fd, vma->vma.pgoff);
> @@ -266,7 +277,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>  		 */
>  		vma->ppage_bitmap = p->page_bitmap;
>  
> -		addr = mremap(paddr, vma_area_len(vma), vma_area_len(vma),
> +		addr = mremap(paddr, size, size,
>  				MREMAP_FIXED | MREMAP_MAYMOVE, tgt_addr);
>  		if (addr != tgt_addr) {
>  			pr_perror("Unable to remap a private vma");
> @@ -279,10 +290,15 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
>  	pr_debug("\tpremap 0x%016"PRIx64"-0x%016"PRIx64" -> %016lx\n",
>  		vma->vma.start, vma->vma.end, (unsigned long)addr);
>  
> +	if (vma->vma.flags & MAP_GROWSDOWN) { /* Skip gurad page */
> +		vma->vma.start += PAGE_SIZE;
> +		vma_premmaped_start(&vma->vma) += PAGE_SIZE;
> +	}
> +
>  	if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE))
>  		close(vma->vma.fd);
>  
> -	return 0;
> +	return size;
>  }
>  
>  static int restore_priv_vma_content(pid_t pid)
> @@ -475,8 +491,11 @@ static int prepare_mappings(int pid)
>  			break;
>  		}
>  
> -		if (vma_priv(&vma->vma))
> +		if (vma_priv(&vma->vma)) {
>  			rst_vmas.priv_size += vma_area_len(vma);
> +			if (vma->vma.flags & MAP_GROWSDOWN)
> +				rst_vmas.priv_size += PAGE_SIZE;
> +		}
>  	}
>  	close(fd);
>  
> @@ -512,10 +531,10 @@ static int prepare_mappings(int pid)
>  		if (ret < 0)
>  			break;
>  
> -		addr += vma_area_len(vma);
> +		addr += ret;
>  	}
>  
> -	if (ret == 0)
> +	if (ret >= 0)
>  		ret = restore_priv_vma_content(pid);
>  
>  out:
> @@ -536,6 +555,31 @@ out:
>  	return ret;
>  }
>  
> +/*
> + * A gard page must be unmapped after restoring content and
> + * forking children to restore COW memory.
> + */
> +int unmap_guard_pages()
> +{
> +	struct vma_area *vma;
> +
> +	list_for_each_entry(vma, &rst_vmas.h, list) {

Let's unmap them "in place", instead of one more vmas list scan.

> +		if (!vma_priv(&vma->vma))
> +			continue;
> +
> +		if (vma->vma.flags & MAP_GROWSDOWN) {
> +			void *addr = (void *) vma_premmaped_start(&vma->vma);
> +
> +			if (munmap(addr - PAGE_SIZE, PAGE_SIZE)) {
> +				pr_perror("Can't unmap guard page\n");
> +				return -1;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static int open_vmas(int pid)
>  {
>  	struct vma_area *vma;
> @@ -1184,6 +1228,8 @@ static int restore_task_with_children(void *_arg)
>  	if (create_children_and_session())
>  		exit(1);
>  
> +	if (unmap_guard_pages())
> +		exit(1);
>  	/*
>  	 * Unlike sessions, process groups (a.k.a. pgids) can be joined
>  	 * by any task, provided the task with pid == pgid (group leader)
> 




More information about the CRIU mailing list