[CRIU] [PATCH 3/5] deduplication: look up for old pages in previous snapshot

Pavel Emelyanov xemul at parallels.com
Tue Oct 15 00:20:30 PDT 2013


On 10/13/2013 04:22 PM, Tikhomirov Pavel wrote:
> old snapshot from "parent" symlink, and pids from pagemap-PID.img files
> 
> Signed-off-by: Tikhomirov Pavel <snorcht at gmail.com>
> ---
>  cr-dedup.c          |  132 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/page-read.h |    1 +
>  page-read.c         |   22 +++++++--
>  3 files changed, 151 insertions(+), 4 deletions(-)
> 
> diff --git a/cr-dedup.c b/cr-dedup.c
> index 4f959dd..25a3129 100644
> --- a/cr-dedup.c
> +++ b/cr-dedup.c
> @@ -1,9 +1,139 @@
>  #include <unistd.h>
>  
>  #include "crtools.h"
> +#include "page-read.h"
> +#include "restorer.h"
>  #include "cr-dedup.h"
>  
> -int cr_dedup(void)
> +int _cr_dedup(int predump_fd, int pid);

This name is bad. Name this function according to what it does. E.g. cr_dedup_one_pagemap().

> +
> +int cr_dedup(void) 
> +{
> +	int close_ret, ret = 0;
> +	int pid;
> +	int predump_fd = -1;
> +	DIR * dirp;
> +	struct dirent *ent;
> +	
> +	dirp = opendir("parent");

s/"parent"/CR_PARENT_LINK/

> +	if (dirp == NULL) {
> +		pr_err("Can't enter previous snapshot folder, error=%d\n", errno);
> +		ret = -1;
> +		goto err;;
> +	}
> +
> +	predump_fd = dirfd(dirp);
> +	if (predump_fd == -1) {
> +		ret = -1;
> +		goto err;
> +	}
> +
> +	while (1) {
> +		errno = 0;
> +		ent = readdir(dirp);
> +		if (ent == NULL) {
> +			if (errno)
> +				pr_err("Failed readdir, error=%d\n", errno);
> +			break;
> +		}
> +
> +		ret = sscanf(ent->d_name, "pagemap-%d.img", &pid);
> +		if (ret == 1) {
> +			pr_info("pid=%d\n", pid);
> +			ret = _cr_dedup(predump_fd, pid);
> +			if (ret < 0)
> +				break;
> +		}
> +	}
> +
> +err:
> +	if (predump_fd >= 0) {
> +		close_ret = close(predump_fd);
> +		if (close_ret == -1)
> +			return close_ret;
> +	}
> +
> +	if (dirp) {
> +		close_ret = closedir(dirp);
> +		if (close_ret == -1)
> +			return close_ret;
> +	}
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +int _cr_dedup(int predump_fd, int pid)

It's not predump_fd, it's parent_dir.

>  {
> +	int ret;
> +	struct page_read pr, pr2;
> +	unsigned long off;
> +	struct iovec iov, iov2;	
> +	
> +	ret = open_page_read(pid, &pr);
> +	if (ret) {
> +		ret = -1;
> +		goto exit;
> +	}
> +
> +	ret = open_page_rw_dedup(predump_fd, pid, &pr2);

Why do you open the parent's pagemap? The open_page_read() does it for you.
Utilize _this_.

Moreover, the page_read->get_pagemap() thing does walk the pagemap ranges
looking into parent. If you can re-use (parts) of this code, that would be
great.

> +	if (ret) {
> +		ret = -1;
> +		goto exit;
> +	}
> +
> +	ret = pr.get_pagemap(&pr, &iov);
> +	if (ret <= 0)
> +		goto exit;
> +
> +	ret = pr2.get_pagemap(&pr2, &iov2);
> +	if (ret <= 0)
> +		goto exit;
> +
> +	off = 0;
> +	while (1) {
> +		unsigned long i_off;
> +		long i_len;
> +

>From here to ...

> +		i_off = (unsigned long)max(iov.iov_base, iov2.iov_base) - (unsigned long)iov2.iov_base;
> +		i_len = (long)(min(iov.iov_base + iov.iov_len, iov2.iov_base + iov2.iov_len)
> +			- max(iov.iov_base, iov2.iov_base));
> +
> +		if (iov.iov_base + iov.iov_len == iov2.iov_base + iov2.iov_len) {
> +			ret = pr.get_pagemap(&pr, &iov);
> +			if (ret <= 0)
> +				break;
> +
> +			if (!pr2.pe->in_parent)
> +				off += pr2.pe->nr_pages;
> +			ret = pr2.get_pagemap(&pr2, &iov2);
> +			if (ret <= 0)
> +				break;
> +			continue;
> +		}
> +
> +		if (iov.iov_base + iov.iov_len < iov2.iov_base + iov2.iov_len) {
> +			ret = pr.get_pagemap(&pr, &iov);
> +			if (ret <= 0)
> +				break;
> +		} else {
> +			if (!pr2.pe->in_parent)
> +				off += pr2.pe->nr_pages;
> +			ret = pr2.get_pagemap(&pr2, &iov2);
> +			if (ret <= 0)
> +				break;
> +		}

... here. Some comments describing an algorithm is required.

> +	}
> +exit:
> +	pr_info("Dedup test END\n");

"test" ?

> +
> +	pr2.close(&pr2);
> +	pr.close(&pr);
> +
> +	if (ret < 0)
> +		return ret;
> +
>  	return 0;
>  }
> diff --git a/include/page-read.h b/include/page-read.h
> index b19ea69..47c1562 100644
> --- a/include/page-read.h
> +++ b/include/page-read.h
> @@ -66,4 +66,5 @@ struct page_read {
>  };
>  
>  int open_page_read(int pid, struct page_read *);
> +int open_page_rw_dedup(int dfd, int pid, struct page_read *);
>  #endif
> diff --git a/page-read.c b/page-read.c
> index 78b1303..52c9b12 100644
> --- a/page-read.c
> +++ b/page-read.c
> @@ -181,13 +181,16 @@ err_cl:
>  	return -1;
>  }
>  
> -static int open_page_read_at(int dfd, int pid, struct page_read *pr)
> +static int open_page_rw_at(int dfd, int pid, struct page_read *pr, int rw)
>  {
>  	pr->pe = NULL;
>  
>  	pr->fd = open_image_at(dfd, CR_FD_PAGEMAP, O_RSTR, (long)pid);
>  	if (pr->fd < 0) {
> -		pr->fd_pg = open_image_at(dfd, CR_FD_PAGES_OLD, O_RSTR, pid);
> +		if (rw)
> +			pr->fd_pg = open_image_at(dfd, CR_FD_PAGES_OLD, O_RDWR, pid);
> +		else
> +			pr->fd_pg = open_image_at(dfd, CR_FD_PAGES_OLD, O_RSTR, pid);
>  		if (pr->fd_pg < 0)
>  			return -1;
>  
> @@ -203,7 +206,10 @@ static int open_page_read_at(int dfd, int pid, struct page_read *pr)
>  			return -1;
>  		}
>  
> -		pr->fd_pg = open_pages_image_at(dfd, O_RSTR, pr->fd);
> +		if (rw)

Better make the int rw argument be int flags and push it directly into open_pages_image_at.

> +			pr->fd_pg = open_pages_image_at(dfd, O_RDWR, pr->fd);
> +		else
> +			pr->fd_pg = open_pages_image_at(dfd, O_RSTR, pr->fd);
>  		if (pr->fd_pg < 0) {
>  			close_page_read(pr);
>  			return -1;
> @@ -223,7 +229,17 @@ static int open_page_read_at(int dfd, int pid, struct page_read *pr)
>  	return 0;
>  }
>  
> +static int open_page_read_at(int dfd, int pid, struct page_read *pr)
> +{
> +	return open_page_rw_at(dfd, pid, pr, 0);
> +}
> +
>  int open_page_read(int pid, struct page_read *pr)
>  {
>  	return open_page_read_at(get_service_fd(IMG_FD_OFF), pid, pr);
>  }
> +
> +int open_page_rw_dedup(int dfd, int pid, struct page_read *pr)
> +{
> +	return open_page_rw_at(dfd, pid, pr, 1);
> +}
> 




More information about the CRIU mailing list