[CRIU] [PATCH 07/15] restore: map private vma-s before forking children (v2)

Pavel Emelyanov xemul at parallels.com
Mon Nov 12 06:37:06 EST 2012


On 11/02/2012 05:31 PM, Andrey Vagin wrote:
> In this case private vma-s will be inherited by children,
> it allows to restore copy-on-write reqions.
> 
> This code compares child and parent vma lists. If it found
> two vma-s with the same start and end addresses, it decides
> that the child inherites this vmas from the parent.
> 
> This code calculates a size of all private vma-s, then allocate
> a memory region for all vma-s and maps them one by one. If a vma is
> inherited it will be remaped to an allocated place.
> 
> As a result all vma-s will be placed in a continious memory region
> and sorted by start addresses. This logic will be used for remap
> vma-s to correct address.
> 
> v2: fix accoding with Pavel's comments ( clean up and simplify )
> 
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  cr-restore.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 98 insertions(+), 1 deletion(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 1c9da15..0597e6b 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -67,6 +67,8 @@ static int prepare_restorer_blob(void);
>  
>  static LIST_HEAD(rst_vma_list);
>  static int rst_nr_vmas;
> +static void *premmapped_addr;
> +static unsigned long premmapped_len;
>  
>  static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
>  {
> @@ -179,11 +181,68 @@ err:
>  	return ret;
>  }
>  
> +/* Map a private vma, if it is not mapped by a parrent yet */
> +static int map_private_vma(struct vma_area *vma, void *tgt_addr,
> +			struct vma_area **pvma, struct list_head *pvma_list)
> +{
> +	void *addr;
> +	struct vma_area *p = *pvma;
> +
> +	list_for_each_entry_continue(p, pvma_list, list) {

This all works only when vma-s in the image are sorted. We don't have any
checks for this in image, plz, add one.

> +		if (p->vma.start > vma->vma.start)
> +			 break;
> +
> +		if (p->vma.end == vma->vma.end &&
> +		    p->vma.start == vma->vma.start) {
> +			pr_info("COW 0x%016lx-0x%016lx 0x%016lx vma\n",
> +				vma->vma.start, vma->vma.end, vma->vma.pgoff);
> +			vma_premmaped_start(&(vma->vma)) = vma_premmaped_start(&p->vma);

Using vma->vma's premmaped_start to detect whether or not we've found
a good parent vma is obfuscating. Plz, use another local variable for this.

> +			break;
> +		}
> +
> +	}
> +
> +	*pvma = p;
> +
> +	if (!vma_premmaped_start(&vma->vma)) {
> +		pr_info("Map 0x%016lx-0x%016lx 0x%016lx vma\n",
> +			vma->vma.start, vma->vma.end, vma->vma.pgoff);
> +
> +		addr = mmap(tgt_addr, vma_entry_len(&vma->vma),
> +				vma->vma.prot | PROT_WRITE,
> +				vma->vma.flags | MAP_FIXED,
> +				vma->vma.fd, vma->vma.pgoff);
> +
> +		if (addr == MAP_FAILED) {
> +			pr_perror("Unable to map ANON_VMA");
> +			return -1;
> +		}
> +		vma_premmaped_start(&(vma->vma)) = (unsigned long) addr;
> +	} else {
> +		addr = mremap((void *)vma_premmaped_start(&vma->vma),
> +				vma_area_len(vma), vma_area_len(vma),
> +				MREMAP_FIXED | MREMAP_MAYMOVE, tgt_addr);
> +		if (addr != tgt_addr) {
> +			pr_perror("Unable to remap a private vma");
> +			return -1;
> +		}
> +
> +		vma_premmaped_start(&(vma->vma)) = (unsigned long) addr;
> +	}
> +
> +	return 0;
> +}
> +
>  static int read_vmas(int pid)
>  {
>  	int fd, ret = 0;
>  	LIST_HEAD(old);
> -	struct vma_area *vma;
> +	struct vma_area *pvma, *vma;
> +	unsigned long priv_size = 0;
> +	void *addr;
> +
> +	void *old_premmapped_addr = NULL;
> +	unsigned long old_premmapped_len;
>  
>  	list_replace_init(&rst_vma_list, &old);
>  
> @@ -216,6 +275,36 @@ static int read_vmas(int pid)
>  
>  		vma->vma = *e;
>  		vma_entry__free_unpacked(e, NULL);
> +
> +		if (!vma_priv(&vma->vma))
> +			continue;
> +
> +		priv_size += vma_area_len(vma);
> +	}
> +
> +	/* Reserve a place for mapping private vma-s one by one */
> +	addr = mmap(NULL, priv_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
> +	if (addr == MAP_FAILED) {
> +		pr_perror("Unable to reserve memory");
> +		return -1;
> +	}
> +
> +	old_premmapped_addr = premmapped_addr;
> +	old_premmapped_len = premmapped_len;
> +	premmapped_addr = addr;
> +	premmapped_len = priv_size;
> +
> +	pvma = list_entry(&old, struct vma_area, list);
> +
> +	list_for_each_entry(vma, &rst_vma_list, list) {
> +		if (!vma_priv(&vma->vma))
> +			continue;
> +
> +		ret = map_private_vma(vma, addr, &pvma, &old);
> +		if (ret < 0)
> +			break;
> +
> +		addr += vma_area_len(vma);
>  	}
>  
>  	close(fd);
> @@ -227,6 +316,14 @@ out:
>  		xfree(vma);
>  	}
>  
> +	if (old_premmapped_addr &&
> +	    munmap(old_premmapped_addr, old_premmapped_len)) {
> +		pr_perror("Unable to unmap %p(%lx)",
> +				old_premmapped_addr, old_premmapped_len);
> +		return -1;
> +	}
> +
> +
>  	return ret;
>  }
>  
> 




More information about the CRIU mailing list