[CRIU] [PATCH 2/4] ghost: Add chunked ghost files

Pavel Emelyanov xemul at virtuozzo.com
Thu Jun 15 19:04:30 MSK 2017


If the ghost file is too big, it might make sence to try seeking
for holes in it, thus reducing the image size.

We've seen this once for tmpfs files in issue #230.

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 criu/files-reg.c             | 177 ++++++++++++++++++++++++++++++++++++++++++-
 criu/include/protobuf-desc.h |   1 +
 images/ghost-file.proto      |   7 ++
 3 files changed, 183 insertions(+), 2 deletions(-)

diff --git a/criu/files-reg.c b/criu/files-reg.c
index 5b72aa7..e7b073c 100644
--- a/criu/files-reg.c
+++ b/criu/files-reg.c
@@ -9,9 +9,15 @@
 #include <sys/vfs.h>
 #include <sys/prctl.h>
 #include <ctype.h>
+#include <sys/sendfile.h>
 #include <sched.h>
 #include <sys/capability.h>
 
+#ifndef SEEK_DATA
+#define SEEK_DATA	3
+#define SEEK_HOLE	4
+#endif
+
 /* Stolen from kernel/fs/nfs/unlink.c */
 #define SILLYNAME_PREF ".nfs"
 #define SILLYNAME_SUFF_LEN (((unsigned)sizeof(u64) << 1) + ((unsigned)sizeof(unsigned int) << 1))
@@ -142,6 +148,152 @@ static int trim_last_parent(char *path)
 	return 0;
 }
 
+#define BUFSIZE	(4096)
+
+static int copy_chunk_from_file(int fd, int img, off_t off, size_t len)
+{
+	char *buf = NULL;
+	int ret;
+
+	if (opts.remote) {
+		buf = xmalloc(BUFSIZE);
+		if (!buf)
+			return -1;
+	}
+
+	while (len > 0) {
+		if (opts.remote) {
+			ret = pread(fd, buf, min_t(size_t, BUFSIZE, len), off);
+			if (ret <= 0) {
+				pr_perror("Can't read from ghost file");
+				return -1;
+			}
+			if (write(img, buf, ret) != ret) {
+				pr_perror("Can't write to image");
+				return -1;
+			}
+			off += ret;
+		} else {
+			ret = sendfile(img, fd, &off, len);
+			if (ret <= 0) {
+				pr_perror("Can't send ghost to image");
+				return -1;
+			}
+		}
+
+		len -= ret;
+	}
+
+	xfree(buf);
+
+	return 0;
+}
+
+static int copy_file_to_chunks(int fd, struct cr_img *img, size_t file_size)
+{
+	GhostChunkEntry ce = GHOST_CHUNK_ENTRY__INIT;
+	off_t data, hole = 0;
+
+	while (hole < file_size) {
+		data = lseek(fd, hole, SEEK_DATA);
+		if (data < 0) {
+			if (errno == ENXIO)
+				/* No data */
+				break;
+			else if (hole == 0) {
+				/* No SEEK_HOLE/DATA by FS */
+				data = 0;
+				hole = file_size;
+			} else {
+				pr_perror("Can't seek file data");
+				return -1;
+			}
+		} else {
+			hole = lseek(fd, data, SEEK_HOLE);
+			if (hole < 0) {
+				pr_perror("Can't seek file hole");
+				return -1;
+			}
+		}
+
+		ce.len = hole - data;
+		ce.off = data;
+
+		if (pb_write_one(img, &ce, PB_GHOST_CHUNK))
+			return -1;
+
+		if (copy_chunk_from_file(fd, img_raw_fd(img), ce.off, ce.len))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int copy_chunk_to_file(int img, int fd, off_t off, size_t len)
+{
+	char *buf = NULL;
+	int ret;
+
+	if (opts.remote) {
+		buf = xmalloc(BUFSIZE);
+		if (!buf)
+			return -1;
+	}
+
+	while (len > 0) {
+		if (opts.remote) {
+			ret = read(img, buf, min_t(size_t, BUFSIZE, len));
+			if (ret <= 0) {
+				pr_perror("Can't read from image");
+				return -1;
+			}
+			if (pwrite(fd, buf, ret, off) != ret) {
+				pr_perror("Can't write to file");
+				return -1;
+			}
+		} else {
+			if (lseek(fd, off, SEEK_SET) < 0) {
+				pr_perror("Can't seek file");
+				return -1;
+			}
+			ret = sendfile(fd, img, NULL, len);
+			if (ret < 0) {
+				pr_perror("Can't send data");
+				return -1;
+			}
+		}
+
+		off += ret;
+		len -= ret;
+	}
+
+	xfree(buf);
+
+	return 0;
+}
+
+static int copy_file_from_chunks(struct cr_img *img, int fd, size_t file_size)
+{
+	if (ftruncate(fd, file_size) < 0) {
+		pr_perror("Can't make file size");
+		return -1;
+	}
+
+	while (1) {
+		int ret;
+		GhostChunkEntry *ce;
+
+		ret = pb_read_one_eof(img, &ce, PB_GHOST_CHUNK);
+		if (ret <= 0)
+			return ret;
+
+		if (copy_chunk_to_file(img_raw_fd(img), fd, ce->off, ce->len))
+			return -1;
+
+		ghost_chunk_entry__free_unpacked(ce, NULL);
+	}
+}
+
 static int mkreg_ghost(char *path, GhostFileEntry *gfe, struct cr_img *img)
 {
 	int gfd, ret;
@@ -150,7 +302,15 @@ static int mkreg_ghost(char *path, GhostFileEntry *gfe, struct cr_img *img)
 	if (gfd < 0)
 		return -1;
 
-	ret = copy_file(img_raw_fd(img), gfd, 0);
+	if (gfe->chunks) {
+		if (!gfe->has_size) {
+			pr_err("Corrupted ghost image -> no size\n");
+			return -1;
+		}
+
+		ret = copy_file_from_chunks(img, gfd, gfe->size);
+	} else
+		ret = copy_file(img_raw_fd(img), gfd, 0);
 	if (ret < 0)
 		unlink(path);
 	close(gfd);
@@ -589,6 +749,9 @@ static struct collect_image_info remap_cinfo = {
 	.collect = collect_one_remap,
 };
 
+/* Tiny files don't need to generate chunks in ghost image. */
+#define GHOST_CHUNKS_THRESH	(3 * 4096)
+
 static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_dev)
 {
 	struct cr_img *img;
@@ -621,6 +784,12 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de
 		gfe.rdev = st->st_rdev;
 	}
 
+	if (S_ISREG(st->st_mode) && (st->st_size >= GHOST_CHUNKS_THRESH)) {
+		gfe.has_chunks = gfe.chunks = true;
+		gfe.has_size = true;
+		gfe.size = st->st_size;
+	}
+
 	if (pb_write_one(img, &gfe, PB_GHOST_FILE))
 		return -1;
 
@@ -638,7 +807,11 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de
 			pr_perror("Can't open ghost original file");
 			return -1;
 		}
-		ret = copy_file(fd, img_raw_fd(img), st->st_size);
+
+		if (gfe.chunks)
+			ret = copy_file_to_chunks(fd, img, st->st_size);
+		else
+			ret = copy_file(fd, img_raw_fd(img), st->st_size);
 		close(fd);
 		if (ret)
 			return -1;
diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
index 1fd095f..66460cf 100644
--- a/criu/include/protobuf-desc.h
+++ b/criu/include/protobuf-desc.h
@@ -63,6 +63,7 @@ enum {
 	PB_LOCAL_IMAGE,         /* Header for reading/writing images from/to proxy or cache. */
 	PB_LOCAL_IMAGE_REPLY,	/* Header for reading/writing images reply. */
 	PB_SNAPSHOT_ID,         /* Contains a single id. Used for reading/writing ids from proxy or cache. */
+	PB_GHOST_CHUNK,
 
 	/* PB_AUTOGEN_STOP */
 
diff --git a/images/ghost-file.proto b/images/ghost-file.proto
index 4b3415b..eda4664 100644
--- a/images/ghost-file.proto
+++ b/images/ghost-file.proto
@@ -13,4 +13,11 @@ message ghost_file_entry {
 	optional uint32		rdev		= 6 [(criu).dev = true, (criu).odev = true];
 	optional timeval	atim		= 7;
 	optional timeval	mtim		= 8;
+	optional bool		chunks		= 9;
+	optional uint64		size		= 10;
+}
+
+message ghost_chunk_entry {
+	required uint64		len		= 1;
+	required uint64		off		= 2;
 }
-- 
2.1.4



More information about the CRIU mailing list