[CRIU] [RFC] files: Add file path handling engine

Cyrill Gorcunov gorcunov at openvz.org
Mon Jun 25 11:56:20 EDT 2012


Hi guys, tell me what you think on the patch attached? I think
such approach will allow us to no duplicate file pathes if a
program opens same file several times.

Does the overall code look better or worse?

	Cyrill
-------------- next part --------------
>From ccfa48d39074354a89d39d12fbb8e23e809a673b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov at openvz.org>
Date: Mon, 25 Jun 2012 19:38:27 +0400
Subject: [PATCH] files: Add file path handling engine

The base idea is to handle file path out of file
type specifics.

This means we have new image file which consists of
the following entries

 fpath_entry
 -----------
 u32	id;
 u32	id_remap;
 qstr_t	path;

where qstr_t is dynamic structure

typedef struct {
	u32	len;
	u8	str[0];
} qstr_t;

At chempoint time the dumper figures out which file path
opened file refers to and dump this path into fpath_entry
on disk providing fpath_entry ID back to a caller.

This fpath_entry ID should be remembered by a calling side
and used on restore procedure to fetch out real symbolic
path back.

In case if file path no longer exist, say it was deleted,
the id_remap will has non-zero value and remap engine
will provide suitable ghost file name back.

Thus, for example, on regular files overall scheme
looks like this

checkpoint:  id_fpath = fpath_dump_one(), wire id_fpath
restore:     remap = lookup_fpath(id_fpath)
     where
     remap->fpe.path.str provides real ptath
     remap->gf->rst.path provides ghost file path (if remap->gf != NULL)

On restore collect_fpath_remaps should be called to gather all
intofmation needed for file pathes remapping and ghost file
opening.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 Makefile              |    1 +
 cr-restore.c          |    5 +-
 cr-show.c             |   53 +++----
 crtools.c             |    2 +-
 files-reg.c           |  314 ++------------------------------------
 fpath-remap.c         |  408 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/crtools.h     |    4 +-
 include/fpath-remap.h |   49 ++++++
 include/image.h       |   36 +++--
 include/util.h        |    1 -
 util.c                |   29 ----
 11 files changed, 524 insertions(+), 378 deletions(-)
 create mode 100644 fpath-remap.c
 create mode 100644 include/fpath-remap.h

diff --git a/Makefile b/Makefile
index 3853028..3ed4315 100644
--- a/Makefile
+++ b/Makefile
@@ -45,6 +45,7 @@ OBJS		+= sk-unix.o
 OBJS		+= sk-queue.o
 OBJS		+= files.o
 OBJS		+= files-reg.o
+OBJS		+= fpath-remap.o
 OBJS		+= pipes.o
 OBJS		+= file-ids.o
 OBJS		+= namespaces.o
diff --git a/cr-restore.c b/cr-restore.c
index 79dadac..8302dc0 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -35,6 +35,7 @@
 #include "lock.h"
 #include "files.h"
 #include "files-reg.h"
+#include "fpath-remap.h"
 #include "pipes.h"
 #include "sk-inet.h"
 #include "eventfd.h"
@@ -295,6 +296,9 @@ static int prepare_shared(void)
 	if (prepare_shared_fdinfo())
 		return -1;
 
+	if (collect_fpath_remaps())
+		return -1;
+
 	if (collect_reg_files())
 		return -1;
 
@@ -1004,7 +1008,6 @@ out:
 	/*
 	 * Maybe rework ghosts to be auto-unlinkable?
 	 */
-
 	clear_ghost_files();
 	tcp_unlock_connections();
 
diff --git a/cr-show.c b/cr-show.c
index 49f9588..9ae744b 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -115,19 +115,9 @@ void show_reg_files(int fd_reg_files, struct cr_options *o)
 		if (ret <= 0)
 			goto out;
 
-		pr_msg("id: 0x%8x flags: 0x%4x pos: 0x%lx ", rfe.id, rfe.flags, rfe.pos);
+		pr_msg("id: 0x%8x id_fpath: 0x%8x flags: 0x%4x pos: 0x%lx ",
+		       rfe.id, rfe.id_fpath, rfe.flags, rfe.pos);
 		show_fown_cont(&rfe.fown);
-
-		if (rfe.len) {
-			int ret = read(fd_reg_files, local_buf, rfe.len);
-			if (ret != rfe.len) {
-				pr_perror("Can't read %d bytes", rfe.len);
-				goto out;
-			}
-			local_buf[rfe.len] = 0;
-			pr_msg(" --> %s", local_buf);
-		}
-
 		pr_msg("\n");
 	}
 
@@ -135,33 +125,34 @@ out:
 	pr_img_tail(CR_FD_REG_FILES);
 }
 
-static inline char *remap_id_type(u32 id)
+void show_fpath_remap(int fd, struct cr_options *o)
 {
-	if (id & REMAP_GHOST)
-		return "ghost";
-	else
-		return "real";
-}
+	struct fpath_entry fpe;
 
-void show_remap_files(int fd, struct cr_options *o)
-{
-	struct remap_file_path_entry rfe;
-
-	pr_img_head(CR_FD_REMAP_FPATH);
+	pr_img_head(CR_FD_FPATH_REMAP);
 
 	while (1) {
-		int ret;
-
-		ret = read_img_eof(fd, &rfe);
+		int ret = read_img_eof(fd, &fpe);
 		if (ret <= 0)
-			break;
+			goto out;
 
-		pr_msg("%#x -> %#x (%s)\n", rfe.orig_id,
-				(rfe.remap_id & ~REMAP_GHOST),
-				remap_id_type(rfe.remap_id));
+		pr_msg("id: 0x%-8x id_remap: 0x%-8x len: %4u",
+			fpe.id, fpe.id_remap, fpe.path.len);
+
+		if (fpe.path.len) {
+			ret = read(fd, local_buf, fpe.path.len);
+			if (ret != fpe.path.len) {
+				pr_perror("Can't read %d bytes", fpe.path.len);
+				goto out;
+			}
+			local_buf[fpe.path.len] = 0;
+			pr_msg(" --> %s", local_buf);
+		}
+		pr_msg("\n");
 	}
 
-	pr_img_tail(CR_FD_REMAP_FPATH);
+out:
+	pr_img_tail(CR_FD_FPATH_REMAP);
 }
 
 void show_ghost_file(int fd, struct cr_options *o)
diff --git a/crtools.c b/crtools.c
index d4d2305..bbe662e 100644
--- a/crtools.c
+++ b/crtools.c
@@ -73,7 +73,7 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
 	FD_ENTRY(IPCNS_MSG,	"ipcns-msg-%d",	 show_ipc_msg),
 	FD_ENTRY(IPCNS_SEM,	"ipcns-sem-%d",	 show_ipc_sem),
 	FD_ENTRY(FS,		"fs-%d",	 show_fs),
-	FD_ENTRY(REMAP_FPATH,	"remap-fpath",	 show_remap_files),
+	FD_ENTRY(FPATH_REMAP,	"fpath-remap",	 show_fpath_remap),
 	FD_ENTRY(GHOST_FILE,	"ghost-file-%x", show_ghost_file),
 	FD_ENTRY(TCP_STREAM,	"tcp-stream-%x", show_tcp_stream),
 	FD_ENTRY(MOUNTPOINTS,	"mountpoints-%d", show_mountpoints),
diff --git a/files-reg.c b/files-reg.c
index 24e3be2..287c6ac 100644
--- a/files-reg.c
+++ b/files-reg.c
@@ -13,311 +13,31 @@
 #include "util.h"
 
 #include "files-reg.h"
+#include "fpath-remap.h"
 
 struct reg_file_info {
 	struct file_desc	d;
-
 	struct reg_file_entry	rfe;
-	char			*remap_path;
-	char			*path;
-};
 
-struct ghost_file {
-	struct list_head	list;
-	u32			id;
+	char			*remap_path;
 	char			*path;
 };
 
-/*
- * Ghost files are those not visible from the FS. Dumping them is
- * nasty and the only way we have -- just carry its contents with
- * us. Any brave soul to implement link unlinked file back?
- */
-struct ghost_file_dumpee {
-	struct list_head	list;
-	u32			id;
-	u32			dev;
-	u32			ino;
-};
-
-static u32 ghost_file_ids = 1;
-static LIST_HEAD(ghost_files);
-
-/*
- * This constant is selected without any calculations. Just do not
- * want to pick up too big files with us in the image.
- */
-#define MAX_GHOST_FILE_SIZE	(1 * 1024 * 1024)
-
-void clear_ghost_files(void)
-{
-	struct ghost_file *gf;
-
-	pr_info("Unlinking ghosts\n");
-	list_for_each_entry(gf, &ghost_files, list) {
-		pr_info("\t`- %s\n", gf->path);
-		unlink(gf->path);
-	}
-}
-
-static int open_remap_ghost(struct reg_file_info *rfi,
-		struct remap_file_path_entry *rfe)
-{
-	struct ghost_file *gf;
-	struct ghost_file_entry gfe;
-	int gfd, ifd;
-
-	list_for_each_entry(gf, &ghost_files, list)
-		if (gf->id == rfe->remap_id)
-			goto gf_found;
-
-	/*
-	 * Ghost not found. We will create one in the same dir
-	 * as the very first client of it thus resolving any
-	 * issues with cross-device links.
-	 */
-
-	pr_info("Opening ghost file %#x for %s\n", rfe->remap_id, rfi->path);
-
-	gf = xmalloc(sizeof(*gf));
-	if (!gf)
-		return -1;
-	gf->path = xmalloc(PATH_MAX);
-	if (!gf->path)
-		goto err;
-
-	ifd = open_image_ro(CR_FD_GHOST_FILE, rfe->remap_id);
-	if (ifd < 0)
-		goto err;
-
-	if (read_img(ifd, &gfe) < 0)
-		goto err;
-
-	snprintf(gf->path, PATH_MAX, "%s.cr.%x.ghost", rfi->path, rfe->remap_id);
-	gfd = open(gf->path, O_WRONLY | O_CREAT | O_EXCL, gfe.mode);
-	if (gfd < 0) {
-		pr_perror("Can't open ghost file");
-		goto err;
-	}
-
-	if (fchown(gfd, gfe.uid, gfe.gid) < 0) {
-		pr_perror("Can't reset user/group on ghost %#x\n", rfe->remap_id);
-		goto err;
-	}
-
-	if (copy_file(ifd, gfd, 0) < 0)
-		goto err;
-
-	close(ifd);
-	close(gfd);
-
-	gf->id = rfe->remap_id;
-	list_add_tail(&gf->list, &ghost_files);
-gf_found:
-	rfi->remap_path = gf->path;
-	return 0;
-
-err:
-	xfree(gf->path);
-	xfree(gf);
-	return -1;
-}
-
-static int collect_remaps(void)
-{
-	int fd, ret = 0;
-
-	fd = open_image_ro(CR_FD_REMAP_FPATH);
-	if (fd < 0)
-		return -1;
-
-	while (1) {
-		struct remap_file_path_entry rfe;
-		struct file_desc *fdesc;
-		struct reg_file_info *rfi;
-
-		ret = read_img_eof(fd, &rfe);
-		if (ret <= 0)
-			break;
-
-		ret = -1;
-
-		if (!(rfe.remap_id & REMAP_GHOST)) {
-			pr_err("Non ghost remap not supported @%#x\n",
-					rfe.orig_id);
-			break;
-		}
-
-		fdesc = find_file_desc_raw(FDINFO_REG, rfe.orig_id);
-		if (fdesc == NULL) {
-			pr_err("Remap for non existing file %#x\n",
-					rfe.orig_id);
-			break;
-		}
-
-		rfe.remap_id &= ~REMAP_GHOST;
-		rfi = container_of(fdesc, struct reg_file_info, d);
-		pr_info("Configuring remap %#x -> %#x\n", rfi->rfe.id, rfe.remap_id);
-		ret = open_remap_ghost(rfi, &rfe);
-		if (ret < 0)
-			break;
-	}
-
-	close(fd);
-	return ret;
-}
-
-static int dump_ghost_file(int _fd, u32 id, const struct stat *st)
-{
-	int img, fd;
-	struct ghost_file_entry gfe;
-	char lpath[32];
-
-	pr_info("Dumping ghost file contents (id %#x)\n", id);
-
-	img = open_image(CR_FD_GHOST_FILE, O_DUMP, id);
-	if (img < 0)
-		return -1;
-
-	/*
-	 * Reopen file locally since it may have no read
-	 * permissions when drained
-	 */
-	snprintf(lpath, sizeof(lpath), "/proc/self/fd/%d", _fd);
-	fd = open(lpath, O_RDONLY);
-	if (fd < 0) {
-		pr_perror("Can't open ghost original file");
-		return -1;
-	}
-
-	gfe.uid = st->st_uid;
-	gfe.gid = st->st_gid;
-	gfe.mode = st->st_mode;
-
-	if (write_img(img, &gfe))
-		return -1;
-
-	if (copy_file(fd, img, st->st_size))
-		return -1;
-
-	close(fd);
-	close(img);
-	return 0;
-}
-
-static int dump_ghost_remap(char *path, const struct stat *st, int lfd, u32 id)
-{
-	struct ghost_file_dumpee *gf;
-	struct remap_file_path_entry rpe;
-
-	pr_info("Dumping ghost file for fd %d id %#x\n", lfd, id);
-
-	if (st->st_size > MAX_GHOST_FILE_SIZE) {
-		pr_err("Can't dump ghost file %s of %lu size\n",
-				path, st->st_size);
-		return -1;
-	}
-
-	list_for_each_entry(gf, &ghost_files, list)
-		if ((gf->dev == st->st_dev) && (gf->ino == st->st_ino))
-			goto dump_entry;
-
-	gf = xmalloc(sizeof(*gf));
-	if (gf == NULL)
-		return -1;
-
-	gf->dev = st->st_dev;
-	gf->ino = st->st_ino;
-	gf->id = ghost_file_ids++;
-	list_add_tail(&gf->list, &ghost_files);
-
-	if (dump_ghost_file(lfd, gf->id, st))
-		return -1;
-
-dump_entry:
-	rpe.orig_id = id;
-	rpe.remap_id = gf->id | REMAP_GHOST;
-
-	return write_img(fdset_fd(glob_fdset, CR_FD_REMAP_FPATH), &rpe);
-}
-
-static int check_path_remap(char *path, const struct stat *ost, int lfd, u32 id)
-{
-	int ret;
-	struct stat pst;
-
-	if (ost->st_nlink == 0)
-		/*
-		 * Unpleasant, but easy case. File is completely invisible
-		 * from the FS. Just dump its contents and that's it. But
-		 * be careful whether anybody still has any of its hardlinks
-		 * also open.
-		 */
-		return dump_ghost_remap(path, ost, lfd, id);
-
-	ret = stat(path, &pst);
-	if (ret < 0) {
-		/*
-		 * FIXME linked file, but path is not accessible (unless any
-		 * other error occurred). We can create a temporary link to it
-		 * uning linkat with AT_EMPTY_PATH flag and remap it to this
-		 * name.
-		 */
-		pr_perror("Can't stat path");
-		return -1;
-	}
-
-	if ((pst.st_ino != ost->st_ino) || (pst.st_dev != ost->st_dev)) {
-		/*
-		 * FIXME linked file, but the name we see it by is reused
-		 * by somebody else.
-		 */
-		pr_err("Unaccessible path opened %u:%u, need %u:%u\n",
-				(int)pst.st_dev, (int)pst.st_ino,
-				(int)ost->st_dev, (int)ost->st_ino);
-		return -1;
-	}
-
-	/*
-	 * File is linked and visible by the name it is opened by
-	 * this task. Go ahead and dump it.
-	 */
-	return 0;
-}
-
-
 int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
 {
-	char fd_str[128];
-	char path[PATH_MAX];
-	int len, rfd;
 	struct reg_file_entry rfe;
+	int img;
 
-	snprintf(fd_str, sizeof(fd_str), "/proc/self/fd/%d", lfd);
-	len = readlink(fd_str, path, sizeof(path) - 1);
-	if (len < 0) {
-		pr_perror("Can't readlink %s", fd_str);
-		return len;
-	}
-
-	path[len] = '\0';
-	pr_info("Dumping path for %d fd via self %d [%s]\n",
-			p->fd, lfd, path);
-
-	if (check_path_remap(path, &p->stat, lfd, id))
+	if (fpath_dump_one(lfd, p, &rfe.id_fpath))
 		return -1;
 
-	rfe.len = len;
 	rfe.flags = p->flags;
 	rfe.pos = p->pos;
 	rfe.id = id;
 	rfe.fown = p->fown;
 
-	rfd = fdset_fd(glob_fdset, CR_FD_REG_FILES);
-
-	if (write_img(rfd, &rfe))
-		return -1;
-	if (write_img_buf(rfd, path, len))
+	img = fdset_fd(glob_fdset, CR_FD_REG_FILES);
+	if (write_img(img, &rfe))
 		return -1;
 
 	return 0;
@@ -394,7 +114,7 @@ int collect_reg_files(void)
 		return -1;
 
 	while (1) {
-		int len;
+		struct fpath_remap *remap;
 
 		rfi = xmalloc(sizeof(*rfi));
 		ret = -1;
@@ -406,18 +126,16 @@ int collect_reg_files(void)
 		if (ret <= 0)
 			break;
 
-		len = rfi->rfe.len;
-		rfi->path = xmalloc(len + 1);
-		ret = -1;
-		if (rfi->path == NULL)
-			break;
-
-		ret = read_img_buf(fd, rfi->path, len);
-		if (ret < 0)
+		remap = lookup_fpath(rfi->rfe.id_fpath);
+		if (!remap) {
+			ret = -1;
+			pr_err("Can't find path for id %x\n", rfi->rfe.id);
 			break;
+		}
+		rfi->path = (char *)remap->fpe.path.str;
 
-		rfi->remap_path = NULL;
-		rfi->path[len] = '\0';
+		if (remap->gf)
+			rfi->remap_path = remap->gf->rst.path;
 
 		pr_info("Collected [%s] ID %#x\n", rfi->path, rfi->rfe.id);
 		file_desc_add(&rfi->d, rfi->rfe.id, &reg_desc_ops);
@@ -430,5 +148,5 @@ int collect_reg_files(void)
 
 	close(fd);
 
-	return collect_remaps();
+	return ret;
 }
diff --git a/fpath-remap.c b/fpath-remap.c
new file mode 100644
index 0000000..b2e64b3
--- /dev/null
+++ b/fpath-remap.c
@@ -0,0 +1,408 @@
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sendfile.h>
+
+#include "crtools.h"
+
+#include "files.h"
+#include "image.h"
+#include "list.h"
+#include "util.h"
+
+#include "fpath-remap.h"
+
+/* Used for checkpoint only */
+static LIST_HEAD(ghost_files);
+
+/*
+ * This constant is selected without any calculations. Just do not
+ * want to pick up too big files with us in the image.
+ */
+#define GHOST_FILE_MAX_SIZE	MEGA(1)
+
+static u32 fpath_ids = 1;
+static u32 ghost_file_ids = 1;
+
+static struct list_head fpath_remap_hash[256];
+static struct list_head ghost_file_hash[128];
+
+static struct ghost_file *lookup_ghost(u32 id)
+{
+	u32 hash = id % ARRAY_SIZE(ghost_file_hash);
+	struct ghost_file *gf;
+
+	list_for_each_entry(gf, &ghost_file_hash[hash], list) {
+		if (gf->id == id)
+			return gf;
+	}
+
+	return NULL;
+}
+
+static void hash_ghost(struct ghost_file *gf)
+{
+	u32 hash = gf->id % ARRAY_SIZE(ghost_file_hash);
+	list_add(&gf->list, &ghost_file_hash[hash]);
+}
+
+struct fpath_remap *lookup_fpath(u32 id)
+{
+	u32 hash = id % ARRAY_SIZE(fpath_remap_hash);
+	struct fpath_remap *remap;
+
+	list_for_each_entry(remap, &fpath_remap_hash[hash], list) {
+		if (remap->fpe.id == id)
+			return remap;
+	}
+
+	return NULL;
+}
+
+static void hash_fpath_remap(struct fpath_remap *remap)
+{
+	u32 hash = remap->fpe.id % ARRAY_SIZE(fpath_remap_hash);
+	list_add(&remap->list, &fpath_remap_hash[hash]);
+}
+
+static int __copy_file(int dst, int src, size_t bytes)
+{
+	ssize_t written = 0;
+	size_t chunk = bytes ? bytes : 4096;
+
+	while (1) {
+		ssize_t ret;
+
+		ret = sendfile(dst, src, NULL, chunk);
+		if (ret < 0) {
+			pr_perror("Can't send data to a file");
+			return -1;
+		}
+
+		if (ret == 0) {
+			if (bytes && (written != bytes)) {
+				pr_err("File size mismatch %lu/%lu\n",
+				       written, bytes);
+				return -1;
+			}
+			break;
+		}
+
+		written += ret;
+	}
+
+	return 0;
+}
+
+static struct ghost_file *open_ghost(const char *path, u32 id)
+{
+	struct ghost_file_entry e;
+	struct ghost_file *gf;
+	int img, gfd;
+
+	gf = lookup_ghost(id);
+	if (gf)
+		goto found;
+
+	/*
+	 * Ghost not found. We will create one in the same dir
+	 * as the very first client of it thus resolving any
+	 * issues with cross-device links.
+	 */
+	pr_info("Opening ghost file %#x for %s\n", id, path);
+
+	gf = xzalloc(sizeof(*gf));
+	if (!gf)
+		return NULL;
+
+	gf->rst.path = xmalloc(PATH_MAX);
+	if (!gf->rst.path)
+		goto err;
+
+	img = open_image_ro(CR_FD_GHOST_FILE, id);
+	if (img < 0)
+		goto err;
+
+	if (read_img(img, &e) < 0)
+		goto err;
+
+	if (e.id != id) {
+		pr_err("Ghost files image corrupted for id %x (got %x)\n",
+			id, e.id);
+		goto err;
+	}
+
+	snprintf(gf->rst.path, PATH_MAX, "%s.cr.%x.ghost", path, id);
+	gfd = open(gf->rst.path, O_WRONLY | O_CREAT | O_EXCL, e.mode);
+	if (gfd < 0) {
+		pr_perror("Can't open ghost file %s", gf->rst.path);
+		goto err;
+	}
+
+	if (fchown(gfd, e.uid, e.gid) < 0) {
+		pr_perror("Can't reset user/group on ghost %#x\n", id);
+		goto err;
+	}
+
+	if (e.size) {
+		if (__copy_file(gfd, img, e.size) < 0)
+			goto err;
+	}
+
+	close(img);
+	close(gfd);
+
+	gf->id = id;
+	hash_ghost(gf);
+
+found:
+	return gf;
+
+err:
+	xfree(gf->rst.path);
+	xfree(gf);
+	return NULL;
+}
+
+static int fpath_check_remap(char *path, const struct stat *ost)
+{
+	struct stat pst;
+	int ret;
+
+	BUILD_BUG_ON(PATH_REMAP_FAIL != -1);
+
+	if (ost->st_nlink == 0)
+		return PATH_REMAP_GHOST;
+
+	ret = stat(path, &pst);
+	if (ret < 0) {
+		/*
+		 * FIXME linked file, but path is not accessible (unless any
+		 * other error occurred). We can create a temporary link to it
+		 * uning linkat with AT_EMPTY_PATH flag and remap it to this
+		 * name.
+		 */
+		pr_perror("Can't stat path");
+		return PATH_REMAP_FAIL;
+	}
+
+	if ((pst.st_ino != ost->st_ino) || (pst.st_dev != ost->st_dev)) {
+		/*
+		 * FIXME linked file, but the name we see it by is reused
+		 * by somebody else.
+		 */
+		pr_err("Unaccessible path opened %u:%u, need %u:%u\n",
+		       (int)pst.st_dev, (int)pst.st_ino,
+		       (int)ost->st_dev, (int)ost->st_ino);
+		return PATH_REMAP_FAIL;
+	}
+
+	/*
+	 * File is linked and visible by the name it is opened by
+	 * this task. Go ahead and dump it.
+	 */
+	return PATH_REMAP_NEW;
+}
+
+static int fpath_dump_ghost_file(u32 id, char *path, char *self, const struct stat *st)
+{
+	struct ghost_file_entry e;
+	int fd, img;
+
+	pr_info("Dumping ghost file %s (id %#x)\n", path, id);
+
+	if (st->st_size > GHOST_FILE_MAX_SIZE) {
+		pr_err("Ghost files size is too big %liM (max %liM)\n",
+		       (long)MBYTES(st->st_size),
+		       (long)MBYTES(GHOST_FILE_MAX_SIZE));
+			return -1;
+	}
+
+	img = open_image(CR_FD_GHOST_FILE, O_DUMP, id);
+	if (img < 0)
+		return -1;
+
+	/*
+	 * Reopen file locally since it may have no read
+	 * permissions when drained
+	 */
+	fd = open(self, O_RDONLY);
+	if (fd < 0) {
+		pr_perror("Can't open ghost original file %s", path);
+		return -1;
+	}
+
+	e.id	= id;
+	e.uid	= st->st_uid;
+	e.gid	= st->st_gid;
+	e.mode	= st->st_mode;
+	e.size	= st->st_size;
+
+	if (write_img(img, &e) < 0) {
+		return -1;
+	}
+
+	if (st->st_size) {
+		if (__copy_file(img, fd, st->st_size))
+			return -1;
+	}
+
+	close(fd);
+	close(img);
+	return 0;
+}
+
+static int fpath_dump_ghost(char *path, char *self, const struct stat *st, u32 *ghost_id)
+{
+	struct ghost_file *gf;
+
+	pr_info("Lookup for ghost file %s\n", path);
+
+	list_for_each_entry(gf, &ghost_files, list) {
+		if ((gf->cpt.dev == st->st_dev) && (gf->cpt.ino == st->st_ino)) {
+			pr_info("Found id %#x\n", gf->id);
+			goto found;
+		}
+	}
+
+	gf = xzalloc(sizeof(*gf));
+	if (!gf)
+		return -1;
+
+	gf->id		= ghost_file_ids++;
+	gf->cpt.dev	= st->st_dev;
+	gf->cpt.ino	= st->st_ino;
+
+	list_add_tail(&gf->list, &ghost_files);
+
+	if (fpath_dump_ghost_file(gf->id, path, self, st))
+		return -1;
+
+found:
+	*ghost_id = gf->id;
+	return 0;
+}
+
+int fpath_dump_one(int lfd, const struct fd_parms *p, u32 *fpath_id)
+{
+	struct fpath_entry e = { };
+	char path[PATH_MAX];
+	int len, img, ret;
+	u32 id_ghost = 0;
+	char self[64];
+
+	snprintf(self, sizeof(self), "/proc/self/fd/%d", lfd);
+	len = readlink(self, path, sizeof(path) - 1);
+	if (len < 0) {
+		pr_perror("Can't readlink on %s", self);
+		return -1;
+	}
+	path[len] = '\0';
+
+	pr_info("Dumping path for %d fd via self %d [%s]\n", p->fd, lfd, path);
+
+	ret = fpath_check_remap(path, &p->stat);
+	switch (ret) {
+	case PATH_REMAP_NEW:
+		break;
+	case PATH_REMAP_GHOST:
+		/*
+		 * Unpleasant, but easy case. File is completely invisible
+		 * from the FS. Just dump its contents and that's it. But
+		 * be careful whether anybody still has any of its hardlinks
+		 * also open.
+		 */
+		ret = fpath_dump_ghost(path, self, &p->stat, &id_ghost);
+		if (ret < 0)
+			return -1;
+		break;
+	default:
+		return -1;
+	}
+
+	img = fdset_fd(glob_fdset, CR_FD_FPATH_REMAP);
+
+	e.id		= fpath_ids++;
+	e.id_remap	= id_ghost;
+	e.path.len	= len;
+
+	if (write_img(img, &e) < 0)
+		return -1;
+	if (write_img_buf(img, path, len) < 0)
+		return -1;
+
+	*fpath_id = e.id;
+	return 0;
+}
+
+void clear_ghost_files(void)
+{
+	struct ghost_file *gf;
+	int i;
+
+	pr_info("Unlinking ghosts\n");
+	for (i = 0; i < ARRAY_SIZE(ghost_file_hash); i++) {
+		list_for_each_entry(gf, &ghost_file_hash[i], list) {
+			pr_info("\t`- %s\n", gf->rst.path);
+			unlink(gf->rst.path);
+		}
+	}
+}
+
+int collect_fpath_remaps(void)
+{
+	struct fpath_remap *remap = NULL;
+	struct fpath_entry tmp;
+	int img, ret = -1, i;
+
+	for (i = 0; i < ARRAY_SIZE(ghost_file_hash); i++)
+		INIT_LIST_HEAD(&ghost_file_hash[i]);
+
+	for (i = 0; i < ARRAY_SIZE(fpath_remap_hash); i++)
+		INIT_LIST_HEAD(&fpath_remap_hash[i]);
+
+	img = open_image_ro(CR_FD_FPATH_REMAP);
+	if (img < 0)
+		return -1;
+
+	while (1) {
+		ret = read_img_eof(img, &tmp);
+		if (ret < 0)
+			goto err;
+		else if (ret == 0) {
+			remap = NULL;
+			break;
+		}
+
+		remap = xmalloc(tmp.path.len + 1 + sizeof(*remap));
+		if (!remap) {
+			ret = -1;
+			goto err;
+		}
+
+		INIT_LIST_HEAD(&remap->list);
+		remap->fpe	= tmp;
+		remap->gf	= NULL;
+
+		ret = read_img_buf(img, remap->fpe.path.str, tmp.path.len);
+		if (ret < 0)
+			goto err;
+		remap->fpe.path.str[tmp.path.len] = 0;
+
+		if (remap->fpe.id_remap) {
+			remap->gf = open_ghost((char *)remap->fpe.path.str, remap->fpe.id_remap);
+			if (!remap->gf)
+				goto err;
+		}
+
+		hash_fpath_remap(remap);
+	}
+
+err:
+	xfree(remap);
+	close(img);
+	return ret;
+}
diff --git a/include/crtools.h b/include/crtools.h
index 4a7875d..7c435e2 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -54,7 +54,7 @@ enum {
 	CR_FD_UNIXSK,
 	CR_FD_PIPES,
 	CR_FD_PIPES_DATA,
-	CR_FD_REMAP_FPATH,
+	CR_FD_FPATH_REMAP,
 	CR_FD_EVENTFD,
 	CR_FD_EVENTPOLL,
 	CR_FD_EVENTPOLL_TFD,
@@ -109,7 +109,7 @@ void show_sigacts(int fd_sigacts, struct cr_options *o);
 void show_itimers(int fd, struct cr_options *o);
 void show_creds(int fd, struct cr_options *o);
 void show_fs(int fd, struct cr_options *o);
-void show_remap_files(int fd, struct cr_options *o);
+void show_fpath_remap(int fd, struct cr_options *o);
 void show_ghost_file(int fd, struct cr_options *o);
 void show_fown_cont(fown_t *fown);
 void show_eventfds(int fd, struct cr_options *o);
diff --git a/include/fpath-remap.h b/include/fpath-remap.h
new file mode 100644
index 0000000..b4fab9c
--- /dev/null
+++ b/include/fpath-remap.h
@@ -0,0 +1,49 @@
+#ifndef FPATH_REMAP_H__
+#define FPATH_REMAP_H__
+
+#include "types.h"
+#include "compiler.h"
+#include "list.h"
+
+#include "image.h"
+
+/*
+ * Numbers are important here, make sure
+ * the caller is prepared for numbering
+ * change.
+ */
+enum {
+	PATH_REMAP_FAIL = -1,
+	PATH_REMAP_FOUND,
+	PATH_REMAP_NEW,
+	PATH_REMAP_GHOST,
+};
+
+struct ghost_file {
+	struct list_head	list;
+	u32			id;
+
+	union {
+		struct {
+			u32	dev;
+			u32	ino;
+		} cpt;			/* used on checkpoint */
+
+		struct {
+			char	*path;
+		} rst;			/* used on restore */
+	};
+};
+
+struct fpath_remap {
+	struct list_head	list;
+	struct ghost_file	*gf;
+	struct fpath_entry	fpe;	/* should be last member */
+};
+
+extern int collect_fpath_remaps(void);
+extern int fpath_dump_one(int lfd, const struct fd_parms *p, u32 *fpath_id);
+extern struct fpath_remap *lookup_fpath(u32 id);
+extern void clear_ghost_files(void);
+
+#endif /* FPATH_REMAP_H__ */
diff --git a/include/image.h b/include/image.h
index afd1561..07916e7 100644
--- a/include/image.h
+++ b/include/image.h
@@ -31,7 +31,7 @@
 #define REG_FILES_MAGIC		0x50363636 /* Belgorod */
 #define FS_MAGIC		0x51403912 /* Voronezh */
 #define MM_MAGIC		0x57492820 /* Pskov */
-#define REMAP_FPATH_MAGIC	0x59133954 /* Vologda */
+#define FPATH_REMAP_MAGIC	0x59133954 /* Vologda */
 #define GHOST_FILE_MAGIC	0x52583605 /* Oryol */
 #define TCP_STREAM_MAGIC	0x51465506 /* Orenburg */
 #define EVENTFD_MAGIC		0x44523722 /* Anapa */
@@ -66,30 +66,36 @@ typedef struct {
 	u32	pid;
 } __packed fown_t;
 
+typedef struct {
+	u32	len;
+	u8	str[0];
+} qstr_t;
+
+/*
+ * File path may be remapped to something else
+ * (say a deleted path, that named ghost files)
+ */
+struct fpath_entry {
+	u32	id;
+	u32	id_remap;	/* usually 0, or id of a ghost file */
+	qstr_t	path;
+}  __packed fpath_t;
+
 struct reg_file_entry {
 	u32	id;
-	u16	flags;
-	u16	len;
+	u32	id_fpath;
 	u64	pos;
 	fown_t	fown;
-	u8	name[0];
-} __packed;
-
-struct remap_file_path_entry {
-	u32	orig_id;
-	u32	remap_id;
+	u16	flags;
 } __packed;
 
-/*
- * Top bit set in the tgt id means we've remapped
- * to a ghost file.
- */
-#define REMAP_GHOST	(1 << 31)
-
 struct ghost_file_entry {
+	u32	id;
 	u32	uid;
 	u32	gid;
 	u32	mode;
+	u64	size;
+	u8	data[0];
 } __packed;
 
 struct eventfd_file_entry {
diff --git a/include/util.h b/include/util.h
index 669558c..8d96a7f 100644
--- a/include/util.h
+++ b/include/util.h
@@ -278,7 +278,6 @@ static inline dev_t kdev_to_odev(u32 kdev)
 	return (kdev_major(kdev) << 8) | kdev_minor(kdev);
 }
 
-int copy_file(int fd_in, int fd_out, size_t bytes);
 bool is_anon_inode(struct statfs *statfs);
 int is_anon_link_type(int lfd, char *type);
 
diff --git a/util.c b/util.c
index 4a62016..c09f481 100644
--- a/util.c
+++ b/util.c
@@ -313,35 +313,6 @@ int get_service_fd(int type)
 	return rlimit.rlim_cur - type;
 }
 
-int copy_file(int fd_in, int fd_out, size_t bytes)
-{
-	ssize_t written = 0;
-	size_t chunk = bytes ? bytes : 4096;
-
-	while (1) {
-		ssize_t ret;
-
-		ret = sendfile(fd_out, fd_in, NULL, chunk);
-		if (ret < 0) {
-			pr_perror("Can't send data to ghost file");
-			return -1;
-		}
-
-		if (ret == 0) {
-			if (bytes && (written != bytes)) {
-				pr_err("Ghost file size mismatch %lu/%lu\n",
-						written, bytes);
-				return -1;
-			}
-			break;
-		}
-
-		written += ret;
-	}
-
-	return 0;
-}
-
 #ifndef ANON_INODE_FS_MAGIC
 # define ANON_INODE_FS_MAGIC 0x09041934
 #endif
-- 
1.7.7.6



More information about the CRIU mailing list