[CRIU] [PATCH] Add checkpoint/restore for /proc/pid/exe symlink

Cyrill Gorcunov gorcunov at openvz.org
Mon Feb 6 10:41:36 EST 2012


This patch adds ability to checkpoint/restore
/proc/pid/exe symlink, so if a process we've just
checkpointed has been say /path/to/exe, then at restore
time we bring this path back.

A few notes

 - If the file which symlink we're about to restore
   has been deleted after checkpoint procedure and
   no longer exist -- we print error message but do
   not interrupt restore procedure because it's not
   that critical

 - Also there some restiction from kernel side: if
   existing /proc/pid/exe already mapped more than
   once, the kernel will refuse to change the symlink,
   so we need to restore it lately when mmaps of crtools
   itself already unmapped (ie via late call in
   restorer.c).

In any case -- we don't treat inability to restore
/proc/pid/exe symlink as a critical error.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 cr-dump.c          |   40 +++++++++++++++++--------
 cr-restore.c       |   12 +++++++-
 files.c            |   13 ++++++++
 include/image.h    |    9 ++++-
 include/restorer.h |    1 +
 include/types.h    |    1 +
 restorer.c         |   80 +++++++++++++++++++++++++++++++++++++++++++++------
 7 files changed, 130 insertions(+), 26 deletions(-)

diff --git a/cr-dump.c b/cr-dump.c
index f5e568d..6d0fbb4 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -138,26 +138,35 @@ err:
 	return ret;
 }
 
-static int dump_cwd(int pid_dir, struct cr_fdset *cr_fdset)
+static int dump_task_special_files(int pid_dir, struct cr_fdset *cr_fdset)
 {
-	int ret = -1;
-	int fd;
-	struct fd_parms p = {
-		.fd_name = FDINFO_CWD,
-		.pos = 0,
-		.flags = 0,
-		.id = NULL,
-	};
+	struct fd_parms params;
+	int fd, ret;
 
+	/* Dump /proc/pid/cwd */
+	params = (struct fd_parms){ .fd_name = FDINFO_CWD, };
 	fd = open_proc(pid_dir, "cwd");
 	if (fd < 0) {
 		pr_perror("Failed to openat cwd");
 		return -1;
 	}
+	ret = dump_one_reg_file(FDINFO_FD, &params, fd, cr_fdset, 1);
+	if (ret)
+		return ret;
 
-	return dump_one_reg_file(FDINFO_FD, &p, fd, cr_fdset, 1);
-}
+	/* Dump /proc/pid/exe */
+	params = (struct fd_parms){ .fd_name = FDINFO_EXE, };
+	fd = open_proc(pid_dir, "exe");
+	if (fd < 0) {
+		pr_perror("Failed to openat exe");
+		return -1;
+	}
+	ret = dump_one_reg_file(FDINFO_FD, &params, fd, cr_fdset, 1);
+	if (ret)
+		return ret;
 
+	return ret;
+}
 
 static int dump_pipe_and_data(int lfd, struct pipe_entry *e,
 			      struct cr_fdset *cr_fdset)
@@ -336,8 +345,13 @@ static int dump_task_files(pid_t pid, int pid_dir, struct cr_fdset *cr_fdset)
 	pr_info("Dumping opened files (pid: %d)\n", pid);
 	pr_info("----------------------------------------\n");
 
-	if (dump_cwd(pid_dir, cr_fdset)) {
-		pr_perror("Can't dump %d's cwd", pid);
+	/*
+	 * Dump special files at the beginning. We might need
+	 * to re-read them in restorer, so better to make it
+	 * fast.
+	 */
+	if (dump_task_special_files(pid_dir, cr_fdset)) {
+		pr_err("Can't dump special files\n");
 		return -1;
 	}
 
diff --git a/cr-restore.c b/cr-restore.c
index 12a01e3..91a71f6 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1615,6 +1615,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
 	LIST_HEAD(self_vma_list);
 	struct vma_area *vma_area;
 	int fd_self_vmas = -1;
+	int fd_fdinfo = -1;
 	int fd_core = -1;
 	int num;
 
@@ -1650,8 +1651,16 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
 		goto err;
 
 	fd_core = open_image_ro_nocheck(FMT_FNAME_CORE_OUT, pid);
-	if (fd_core < 0)
+	if (fd_core < 0) {
 		pr_perror("Can't open core-out-%d", pid);
+		goto err;
+	}
+
+	fd_fdinfo = open_image_ro(CR_FD_FDINFO, pid);
+	if (fd_fdinfo < 0) {
+		pr_perror("Can't open fdinfo-%d", pid);
+		goto err;
+	}
 
 	if (get_image_path(self_vmas_path, sizeof(self_vmas_path),
 			   FMT_FNAME_VMAS, pid))
@@ -1810,6 +1819,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
 	task_args->fd_self_vmas	= fd_self_vmas;
 	task_args->logfd	= get_logfd();
 	task_args->sigchld_act	= sigchld_act;
+	task_args->fd_fdinfo	= fd_fdinfo;
 
 	ret = prepare_itimers(pid, task_args);
 	if (ret < 0)
diff --git a/files.c b/files.c
index 2350049..7c9ceb3 100644
--- a/files.c
+++ b/files.c
@@ -5,6 +5,7 @@
 #include <linux/limits.h>
 
 #include <sys/types.h>
+#include <sys/prctl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -230,6 +231,16 @@ static int restore_cwd(struct fdinfo_entry *fe, int fd)
 	return 0;
 }
 
+static int restore_exe_early(struct fdinfo_entry *fe, int fd)
+{
+	char path[PATH_MAX];
+
+	if (get_file_path(path, fe, fd))
+		return -1;
+
+	pr_info("Skips restoring EXE (%s) symlink at this stage\n", path);
+	return 0;
+}
 
 struct fdinfo_list_entry *find_fdinfo_list_entry(int pid, int fd, struct fdinfo_desc *fi)
 {
@@ -454,6 +465,8 @@ static int open_special_fdinfo(int pid, struct fdinfo_entry *fe,
 		return open_fmap(pid, fe, fdinfo_fd);
 	if (fe->addr == FDINFO_CWD)
 		return restore_cwd(fe, fdinfo_fd);
+	if (fe->addr == FDINFO_EXE)
+		return restore_exe_early(fe, fdinfo_fd);
 
 	BUG_ON(1);
 	return -1;
diff --git a/include/image.h b/include/image.h
index 58b13fc..ce7e3d1 100644
--- a/include/image.h
+++ b/include/image.h
@@ -28,7 +28,9 @@
 #define FDINFO_FD	1
 #define FDINFO_MAP	2
 
-#define FDINFO_CWD	(~0ULL)
+/* Specials */
+#define FDINFO_CWD	(-1ULL)
+#define FDINFO_EXE	(-2ULL)
 
 #define PAGE_IMAGE_SIZE	4096
 #define PAGE_RSS	1
@@ -46,7 +48,10 @@ struct fdinfo_entry {
 	u8	name[0];
 } __packed;
 
-#define fd_is_special(fe)	(((fe)->type != FDINFO_FD) || ((fe)->addr == FDINFO_CWD))
+#define fd_is_special(fe)		\
+	(((fe)->type != FDINFO_FD)  ||	\
+	 ((fe)->addr == FDINFO_CWD) ||	\
+	 ((fe)->addr == FDINFO_EXE))
 
 struct shmem_entry {
 	u64	start;
diff --git a/include/restorer.h b/include/restorer.h
index 5152a86..e0ecf6b 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -65,6 +65,7 @@ struct task_restore_core_args {
 
 	int				pid;			/* task pid */
 	int				fd_core;		/* opened core file */
+	int				fd_fdinfo;		/* opened files dump file */
 	int				fd_self_vmas;		/* opened file with running VMAs to unmap */
 	int				logfd;
 	bool				restore_threads;	/* if to restore threads */
diff --git a/include/types.h b/include/types.h
index b34f024..b329630 100644
--- a/include/types.h
+++ b/include/types.h
@@ -43,6 +43,7 @@
 # define PR_SET_MM_ENV_START		10
 # define PR_SET_MM_ENV_END		11
 # define PR_SET_MM_AUXV			12
+# define PR_SET_MM_EXE_FILE		13
 
 #define PR_SETUP_VDSO_AT	36
 
diff --git a/restorer.c b/restorer.c
index e450de4..9d3435b 100644
--- a/restorer.c
+++ b/restorer.c
@@ -10,6 +10,7 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <sched.h>
+#include <limits.h>
 
 #include "compiler.h"
 #include "types.h"
@@ -223,6 +224,63 @@ long restore_thread(struct thread_restore_args *args)
 		sys_exit(0);
 }
 
+#define sys_prctl_safe(opcode, val1, val2, val3)		\
+	do {							\
+		ret = sys_prctl(opcode, val1, val2, val3, 0);	\
+		if (ret) {					\
+			write_num_n(__LINE__);			\
+			write_num_n(ret);			\
+		}						\
+	} while (0)
+
+static void restore_self_exe_late(struct task_restore_core_args *args)
+{
+	struct fdinfo_entry fe;
+	char *path;
+	int ret;
+
+	for (;;) {
+		if (sys_read(args->fd_fdinfo, &fe, sizeof(fe)) != sizeof(fe)) {
+			write_string("sys_read lookup failed\n");
+			goto err;
+		}
+
+		if (fe.type == FDINFO_FD && fe.addr == FDINFO_EXE)
+			break;
+
+		if (fe.len)
+			sys_lseek(args->fd_fdinfo, fe.len, SEEK_CUR);
+	}
+
+	path = (char *)sys_mmap(NULL, fe.len + 1,
+				PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if ((long)path < 0) {
+		write_string("sys_mmap failed\n");
+		write_num_n(fe.len);
+		goto err;
+	}
+
+	if (sys_read(args->fd_fdinfo, path, fe.len) != fe.len) {
+		sys_munmap(path, fe.len);
+		write_string("sys_read for exe-path failed\n");
+		goto err;
+	}
+	path[fe.len] = '\0';
+
+	write_string("Restoring EXE (");
+	write_string(path);
+	write_string(")\n");
+	sys_prctl_safe(PR_SET_MM, PR_SET_MM_EXE_FILE, (long)path, fe.len + 1);
+
+	sys_munmap(path, fe.len + 1);
+	return;
+
+err:
+	write_num_n(__LINE__);
+	write_num_n(sys_getpid());
+}
+
 /*
  * The main routine to restore task via sigreturn.
  * This one is very special, we never return there
@@ -434,16 +492,6 @@ long restore_task(struct task_restore_core_args *args)
 	/*
 	 * Tune up the task fields.
 	 */
-
-#define sys_prctl_safe(opcode, val1, val2, val3)		\
-	do {							\
-		ret = sys_prctl(opcode, val1, val2, val3, 0);	\
-		if (ret) {					\
-			write_num_n(__LINE__);			\
-			write_num_n(ret);			\
-		}						\
-	} while (0)
-
 	sys_prctl_safe(PR_SET_NAME, (long)core_entry->tc.comm, 0, 0);
 	sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)core_entry->tc.mm_start_code, 0);
 	sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE,	(long)core_entry->tc.mm_end_code, 0);
@@ -460,6 +508,18 @@ long restore_task(struct task_restore_core_args *args)
 							sizeof(core_entry->tc.mm_saved_auxv));
 
 	/*
+	 * Restoring own /proc/pid/exe symlink is a bit
+	 * tricky -- we are to be sure no mmaps are
+	 * done over exec we're going to change, that's
+	 * why it's don that lately. Moreover, we are
+	 * to pass a path to new exec which means the
+	 * code should allocate memory enough for (maybe!)
+	 * pretty long file name.
+	 */
+	restore_self_exe_late(args);
+	sys_close(args->fd_fdinfo);
+
+	/*
 	 * We need to prepare a valid sigframe here, so
 	 * after sigreturn the kernel will pick up the
 	 * registers from the frame, set them up and
-- 
1.7.7.6



More information about the CRIU mailing list