[CRIU] [PATCH] dedup: try add dedup on unmount vma, to auto-dedup and dedup

Tikhomirov Pavel snorcht at gmail.com
Wed Mar 26 07:54:24 PDT 2014


Hi, this is an idea how we can do deduplication on unmount vma
please watch and say if i'm in the wrong direction.

Thanks. Pavel.

main ides is: while going through parent pagemaps in dedup/dump we
can punch pagemaps which are passed by

- fails becouse check_pagehole_in_parent and dedup_one_iovec won't
seek to the end of checked\deduped area, so if this area is not at the
end of parent pagemap entry, and end of parent pagemap intersects
with non inparent pagemap it will dedup hole parent pagemap but need
only last part. (cvaddr need to be at the end to do it right)

cur:   |--hole--|--data--|
parent:    |--da.ta--|
Signed-off-by: Tikhomirov Pavel <snorcht at gmail.com>
---
 cr-dedup.c          | 11 +++++++++++
 include/page-read.h |  1 +
 page-read.c         | 24 ++++++++++++++++++++++++
 page-xfer.c         |  2 +-
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/cr-dedup.c b/cr-dedup.c
index 7758674..e96ee40 100644
--- a/cr-dedup.c
+++ b/cr-dedup.c
@@ -87,6 +87,17 @@ static int cr_dedup_one_pagemap(int pid)
 			ret = dedup_one_iovec(prp, &iov);
 			if (ret)
 				goto exit;
+		} else {
+			/*
+			 * If it is in parent go check it, check will use
+			 * seek_pagemap_page, it does dedup of passed unmaped areas
+			 */
+			ret = check_pagehole_in_parent(prp, &iov);
+			if (ret) {
+				pr_err("Hole %p/%zu not found in parent\n",
+							 iov.iov_base, iov.iov_len);
+				goto exit;
+			}
 		}
 
 		pr.put_pagemap(&pr);
diff --git a/include/page-read.h b/include/page-read.h
index 7a063e9..3b8e496 100644
--- a/include/page-read.h
+++ b/include/page-read.h
@@ -75,4 +75,5 @@ extern int seek_pagemap_page(struct page_read *pr, unsigned long vaddr, bool war
 
 extern int dedup_one_iovec(struct page_read *pr, struct iovec *iov);
 extern int punch_hole(struct page_read *pr, unsigned long off, unsigned long len, bool cleanup);
+extern int check_pagehole_in_parent(struct page_read *p, struct iovec *iov);
 #endif /* __CR_PAGE_READ_H__ */
diff --git a/page-read.c b/page-read.c
index 8937f33..802015b 100644
--- a/page-read.c
+++ b/page-read.c
@@ -101,6 +101,7 @@ int seek_pagemap_page(struct page_read *pr, unsigned long vaddr, bool warn)
 
 	while (1) {
 		unsigned long iov_end;
+		struct iovec tiov;
 
 		if (vaddr < pr->cvaddr) {
 			if (warn)
@@ -111,6 +112,19 @@ int seek_pagemap_page(struct page_read *pr, unsigned long vaddr, bool warn)
 		iov_end = (unsigned long)iov.iov_base + iov.iov_len;
 
 		if (iov_end <= vaddr) {
+			/*
+			 * If pass through pagemap entry in parent, it was unmaped
+			 * so we can dedup it
+			 * pr->cvaddr is used to identify begining of unmaped block
+			 * (end of previously checked region)
+			 * but now it is wrong - need to redo!!!
+			 */
+			if (opts.auto_dedup) {
+				tiov.iov_base = (void*)pr->cvaddr;
+				tiov.iov_len = iov_end - pr->cvaddr;
+				dedup_one_iovec(pr, &tiov);
+			}
+
 			skip_pagemap_pages(pr, iov_end - pr->cvaddr);
 			put_pagemap(pr);
 new_pagemap:
@@ -120,6 +134,16 @@ new_pagemap:
 
 			continue;
 		}
+		
+		/*
+		 * If pass through part of pagemap entry in parent, it was unmaped
+		 * same for pr->cvaddr
+		 */
+		if (opts.auto_dedup) {
+			tiov.iov_base = (void*)pr->cvaddr;
+			tiov.iov_len = vaddr - pr->cvaddr;
+			dedup_one_iovec(pr, &tiov);
+		}
 
 		skip_pagemap_pages(pr, vaddr - pr->cvaddr);
 		return 1;
diff --git a/page-xfer.c b/page-xfer.c
index f26be35..d0601f2 100644
--- a/page-xfer.c
+++ b/page-xfer.c
@@ -487,7 +487,7 @@ static int write_pages_loc(struct page_xfer *xfer,
 	return 0;
 }
 
-static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov)
+int check_pagehole_in_parent(struct page_read *p, struct iovec *iov)
 {
 	int ret;
 	unsigned long off, end;
-- 
1.8.3.2



More information about the CRIU mailing list