[CRIU] [crtools-bot for Cyrill Gorcunov ] restore: Add checkpoint/restore for /proc/pid/exe symlink

Cyrill Gorcunov gorcunov at openvz.org
Tue Feb 7 11:08:01 EST 2012


The commit is pushed to "master" and will appear on git://github.com/cyrillos/crtools.git
------>
commit 76a249282e3d8ed80694f3dacd532629c62477db
Author: Cyrill Gorcunov <gorcunov at openvz.org>
Date:   Tue Feb 7 19:32:11 2012 +0400

    restore: Add checkpoint/restore for /proc/pid/exe symlink
    
    This patch adds ability to checkpoint/restore
    /proc/pid/exe symlink, so if a process we've just
    checkpointed has been say /path/to/exe, then at restore
    time we bring this path back.
    
    There some restiction from kernel side: if
    existing /proc/pid/exe already mapped more than
    once, the kernel will refuse to change the symlink,
    so we need to restore it lately when mmaps of crtools
    itself already unmapped (ie via late call in
    restorer.c).
    
    Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
    Acked-by: Pavel Emelyanov <xemul at parallels.com>
---
 cr-dump.c          |   29 ++++++++++++++++++++++
 cr-restore.c       |   12 ++++++++-
 files.c            |   13 ++++++++++
 include/image.h    |    9 +++++-
 include/restorer.h |    1 +
 include/types.h    |    1 +
 restorer.c         |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/cr-dump.c b/cr-dump.c
index f5e568d..a5ca0e6 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -158,6 +158,25 @@ static int dump_cwd(int pid_dir, struct cr_fdset *cr_fdset)
 	return dump_one_reg_file(FDINFO_FD, &p, fd, cr_fdset, 1);
 }
 
+static int dump_exe(int pid_dir, struct cr_fdset *cr_fdset)
+{
+	int ret = -1;
+	int fd;
+	struct fd_parms p = {
+		.fd_name = FDINFO_EXE,
+		.pos = 0,
+		.flags = 0,
+		.id = NULL,
+	};
+
+	fd = open_proc(pid_dir, "exe");
+	if (fd < 0) {
+		pr_perror("Failed to openat exe");
+		return -1;
+	}
+
+	return dump_one_reg_file(FDINFO_FD, &p, fd, cr_fdset, 1);
+}
 
 static int dump_pipe_and_data(int lfd, struct pipe_entry *e,
 			      struct cr_fdset *cr_fdset)
@@ -336,11 +355,21 @@ static int dump_task_files(pid_t pid, int pid_dir, struct cr_fdset *cr_fdset)
 	pr_info("Dumping opened files (pid: %d)\n", pid);
 	pr_info("----------------------------------------\n");
 
+	/*
+	 * Dump special files at the beginning. We might need
+	 * to re-read them in restorer, so better to make it
+	 * fast.
+	 */
 	if (dump_cwd(pid_dir, cr_fdset)) {
 		pr_perror("Can't dump %d's cwd", pid);
 		return -1;
 	}
 
+	if (dump_exe(pid_dir, cr_fdset)) {
+		pr_perror("Can't dump %d's exe", pid);
+		return -1;
+	}
+
 	fd_dir = opendir_proc(pid_dir, "fd");
 	if (!fd_dir) {
 		pr_perror("Can't open %d's fd", pid);
diff --git a/cr-restore.c b/cr-restore.c
index 12a01e3..91a71f6 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1615,6 +1615,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
 	LIST_HEAD(self_vma_list);
 	struct vma_area *vma_area;
 	int fd_self_vmas = -1;
+	int fd_fdinfo = -1;
 	int fd_core = -1;
 	int num;
 
@@ -1650,8 +1651,16 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
 		goto err;
 
 	fd_core = open_image_ro_nocheck(FMT_FNAME_CORE_OUT, pid);
-	if (fd_core < 0)
+	if (fd_core < 0) {
 		pr_perror("Can't open core-out-%d", pid);
+		goto err;
+	}
+
+	fd_fdinfo = open_image_ro(CR_FD_FDINFO, pid);
+	if (fd_fdinfo < 0) {
+		pr_perror("Can't open fdinfo-%d", pid);
+		goto err;
+	}
 
 	if (get_image_path(self_vmas_path, sizeof(self_vmas_path),
 			   FMT_FNAME_VMAS, pid))
@@ -1810,6 +1819,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
 	task_args->fd_self_vmas	= fd_self_vmas;
 	task_args->logfd	= get_logfd();
 	task_args->sigchld_act	= sigchld_act;
+	task_args->fd_fdinfo	= fd_fdinfo;
 
 	ret = prepare_itimers(pid, task_args);
 	if (ret < 0)
diff --git a/files.c b/files.c
index 1b700e4..39f03e3 100644
--- a/files.c
+++ b/files.c
@@ -5,6 +5,7 @@
 #include <linux/limits.h>
 
 #include <sys/types.h>
+#include <sys/prctl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -226,6 +227,16 @@ static int restore_cwd(struct fdinfo_entry *fe, int fd)
 	return 0;
 }
 
+static int restore_exe_early(struct fdinfo_entry *fe, int fd)
+{
+	/*
+	 * We restore the EXE symlink at very late stage
+	 * because of restrictions applied from kernel side,
+	 * so simply skip it for a while.
+	 */
+	lseek(fd, fe->len, SEEK_CUR);
+	return 0;
+}
 
 struct fdinfo_list_entry *find_fdinfo_list_entry(int pid, int fd, struct fdinfo_desc *fi)
 {
@@ -450,6 +461,8 @@ static int open_special_fdinfo(int pid, struct fdinfo_entry *fe,
 		return open_fmap(pid, fe, fdinfo_fd);
 	if (fe->addr == FDINFO_CWD)
 		return restore_cwd(fe, fdinfo_fd);
+	if (fe->addr == FDINFO_EXE)
+		return restore_exe_early(fe, fdinfo_fd);
 
 	BUG_ON(1);
 	return -1;
diff --git a/include/image.h b/include/image.h
index 58b13fc..ce7e3d1 100644
--- a/include/image.h
+++ b/include/image.h
@@ -28,7 +28,9 @@
 #define FDINFO_FD	1
 #define FDINFO_MAP	2
 
-#define FDINFO_CWD	(~0ULL)
+/* Specials */
+#define FDINFO_CWD	(-1ULL)
+#define FDINFO_EXE	(-2ULL)
 
 #define PAGE_IMAGE_SIZE	4096
 #define PAGE_RSS	1
@@ -46,7 +48,10 @@ struct fdinfo_entry {
 	u8	name[0];
 } __packed;
 
-#define fd_is_special(fe)	(((fe)->type != FDINFO_FD) || ((fe)->addr == FDINFO_CWD))
+#define fd_is_special(fe)		\
+	(((fe)->type != FDINFO_FD)  ||	\
+	 ((fe)->addr == FDINFO_CWD) ||	\
+	 ((fe)->addr == FDINFO_EXE))
 
 struct shmem_entry {
 	u64	start;
diff --git a/include/restorer.h b/include/restorer.h
index 5152a86..e0ecf6b 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -65,6 +65,7 @@ struct task_restore_core_args {
 
 	int				pid;			/* task pid */
 	int				fd_core;		/* opened core file */
+	int				fd_fdinfo;		/* opened files dump file */
 	int				fd_self_vmas;		/* opened file with running VMAs to unmap */
 	int				logfd;
 	bool				restore_threads;	/* if to restore threads */
diff --git a/include/types.h b/include/types.h
index b34f024..b329630 100644
--- a/include/types.h
+++ b/include/types.h
@@ -43,6 +43,7 @@
 # define PR_SET_MM_ENV_START		10
 # define PR_SET_MM_ENV_END		11
 # define PR_SET_MM_AUXV			12
+# define PR_SET_MM_EXE_FILE		13
 
 #define PR_SETUP_VDSO_AT	36
 
diff --git a/restorer.c b/restorer.c
index 401d128..d67288b 100644
--- a/restorer.c
+++ b/restorer.c
@@ -233,6 +233,60 @@ long restore_thread(struct thread_restore_args *args)
 		sys_exit(0);
 }
 
+static long restore_self_exe_late(struct task_restore_core_args *args)
+{
+	struct fdinfo_entry fe;
+	long ret = -1;
+	char *path;
+
+	/*
+	 * Path to exe file and its len is in image.
+	 */
+	for (;;) {
+		if (sys_read(args->fd_fdinfo, &fe, sizeof(fe)) != sizeof(fe)) {
+			write_string("sys_read lookup failed\n");
+			goto err;
+		}
+
+		if (fe.type == FDINFO_FD && fe.addr == FDINFO_EXE)
+			break;
+
+		if (fe.len)
+			sys_lseek(args->fd_fdinfo, fe.len, SEEK_CUR);
+	}
+
+	path = (char *)sys_mmap(NULL, fe.len + 1,
+				PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if ((long)path < 0) {
+		write_string("sys_mmap failed\n");
+		write_num_n(fe.len);
+		goto err;
+	}
+
+	if (sys_read(args->fd_fdinfo, path, fe.len) != fe.len) {
+		sys_munmap(path, fe.len);
+		write_string("sys_read for exe-path failed\n");
+		goto err;
+	}
+	path[fe.len] = '\0';
+
+	write_string("Restoring EXE (");
+	write_string(path);
+	write_string(")\n");
+
+	ret = sys_prctl_safe(PR_SET_MM, PR_SET_MM_EXE_FILE, (long)path, fe.len + 1);
+
+	sys_munmap(path, fe.len + 1);
+
+	return ret;
+
+err:
+	write_num_n(__LINE__);
+	write_num_n(sys_getpid());
+	return ret;
+}
+
 /*
  * The main routine to restore task via sigreturn.
  * This one is very special, we never return there
@@ -462,6 +516,20 @@ long restore_task(struct task_restore_core_args *args)
 		goto core_restore_end;
 
 	/*
+	 * Restoring own /proc/pid/exe symlink is a bit
+	 * tricky -- we are to be sure no mmaps are
+	 * done over exec we're going to change, that's
+	 * why it's don that lately. Moreover, we are
+	 * to pass a path to new exec which means the
+	 * code should allocate memory enough for (maybe!)
+	 * pretty long file name.
+	 */
+	ret = restore_self_exe_late(args);
+	sys_close(args->fd_fdinfo);
+	if (ret)
+		goto core_restore_end;
+
+	/*
 	 * We need to prepare a valid sigframe here, so
 	 * after sigreturn the kernel will pick up the
 	 * registers from the frame, set them up and


More information about the CRIU mailing list