[CRIU] [PATCH 2/5] restore: Add checkpoint/restore for
/proc/pid/exe symlink
Cyrill Gorcunov
gorcunov at openvz.org
Tue Feb 7 10:25:02 EST 2012
This patch adds ability to checkpoint/restore
/proc/pid/exe symlink, so if a process we've just
checkpointed has been say /path/to/exe, then at restore
time we bring this path back.
There some restiction from kernel side: if
existing /proc/pid/exe already mapped more than
once, the kernel will refuse to change the symlink,
so we need to restore it lately when mmaps of crtools
itself already unmapped (ie via late call in
restorer.c).
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
cr-dump.c | 29 ++++++++++++++++++++++
cr-restore.c | 12 ++++++++-
files.c | 13 ++++++++++
include/image.h | 9 +++++-
include/restorer.h | 1 +
include/types.h | 1 +
restorer.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 130 insertions(+), 3 deletions(-)
diff --git a/cr-dump.c b/cr-dump.c
index f5e568d..a5ca0e6 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -158,6 +158,25 @@ static int dump_cwd(int pid_dir, struct cr_fdset *cr_fdset)
return dump_one_reg_file(FDINFO_FD, &p, fd, cr_fdset, 1);
}
+static int dump_exe(int pid_dir, struct cr_fdset *cr_fdset)
+{
+ int ret = -1;
+ int fd;
+ struct fd_parms p = {
+ .fd_name = FDINFO_EXE,
+ .pos = 0,
+ .flags = 0,
+ .id = NULL,
+ };
+
+ fd = open_proc(pid_dir, "exe");
+ if (fd < 0) {
+ pr_perror("Failed to openat exe");
+ return -1;
+ }
+
+ return dump_one_reg_file(FDINFO_FD, &p, fd, cr_fdset, 1);
+}
static int dump_pipe_and_data(int lfd, struct pipe_entry *e,
struct cr_fdset *cr_fdset)
@@ -336,11 +355,21 @@ static int dump_task_files(pid_t pid, int pid_dir, struct cr_fdset *cr_fdset)
pr_info("Dumping opened files (pid: %d)\n", pid);
pr_info("----------------------------------------\n");
+ /*
+ * Dump special files at the beginning. We might need
+ * to re-read them in restorer, so better to make it
+ * fast.
+ */
if (dump_cwd(pid_dir, cr_fdset)) {
pr_perror("Can't dump %d's cwd", pid);
return -1;
}
+ if (dump_exe(pid_dir, cr_fdset)) {
+ pr_perror("Can't dump %d's exe", pid);
+ return -1;
+ }
+
fd_dir = opendir_proc(pid_dir, "fd");
if (!fd_dir) {
pr_perror("Can't open %d's fd", pid);
diff --git a/cr-restore.c b/cr-restore.c
index 12a01e3..91a71f6 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1615,6 +1615,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
LIST_HEAD(self_vma_list);
struct vma_area *vma_area;
int fd_self_vmas = -1;
+ int fd_fdinfo = -1;
int fd_core = -1;
int num;
@@ -1650,8 +1651,16 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
goto err;
fd_core = open_image_ro_nocheck(FMT_FNAME_CORE_OUT, pid);
- if (fd_core < 0)
+ if (fd_core < 0) {
pr_perror("Can't open core-out-%d", pid);
+ goto err;
+ }
+
+ fd_fdinfo = open_image_ro(CR_FD_FDINFO, pid);
+ if (fd_fdinfo < 0) {
+ pr_perror("Can't open fdinfo-%d", pid);
+ goto err;
+ }
if (get_image_path(self_vmas_path, sizeof(self_vmas_path),
FMT_FNAME_VMAS, pid))
@@ -1810,6 +1819,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
task_args->fd_self_vmas = fd_self_vmas;
task_args->logfd = get_logfd();
task_args->sigchld_act = sigchld_act;
+ task_args->fd_fdinfo = fd_fdinfo;
ret = prepare_itimers(pid, task_args);
if (ret < 0)
diff --git a/files.c b/files.c
index 1b700e4..4dd5a4b 100644
--- a/files.c
+++ b/files.c
@@ -5,6 +5,7 @@
#include <linux/limits.h>
#include <sys/types.h>
+#include <sys/prctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/socket.h>
@@ -226,6 +227,16 @@ static int restore_cwd(struct fdinfo_entry *fe, int fd)
return 0;
}
+static int restore_exe_early(struct fdinfo_entry *fe, int fd)
+{
+ char path[PATH_MAX];
+
+ if (get_file_path(path, fe, fd))
+ return -1;
+
+ pr_info("Skips restoring EXE (%s) symlink at this stage\n", path);
+ return 0;
+}
struct fdinfo_list_entry *find_fdinfo_list_entry(int pid, int fd, struct fdinfo_desc *fi)
{
@@ -450,6 +461,8 @@ static int open_special_fdinfo(int pid, struct fdinfo_entry *fe,
return open_fmap(pid, fe, fdinfo_fd);
if (fe->addr == FDINFO_CWD)
return restore_cwd(fe, fdinfo_fd);
+ if (fe->addr == FDINFO_EXE)
+ return restore_exe_early(fe, fdinfo_fd);
BUG_ON(1);
return -1;
diff --git a/include/image.h b/include/image.h
index 58b13fc..ce7e3d1 100644
--- a/include/image.h
+++ b/include/image.h
@@ -28,7 +28,9 @@
#define FDINFO_FD 1
#define FDINFO_MAP 2
-#define FDINFO_CWD (~0ULL)
+/* Specials */
+#define FDINFO_CWD (-1ULL)
+#define FDINFO_EXE (-2ULL)
#define PAGE_IMAGE_SIZE 4096
#define PAGE_RSS 1
@@ -46,7 +48,10 @@ struct fdinfo_entry {
u8 name[0];
} __packed;
-#define fd_is_special(fe) (((fe)->type != FDINFO_FD) || ((fe)->addr == FDINFO_CWD))
+#define fd_is_special(fe) \
+ (((fe)->type != FDINFO_FD) || \
+ ((fe)->addr == FDINFO_CWD) || \
+ ((fe)->addr == FDINFO_EXE))
struct shmem_entry {
u64 start;
diff --git a/include/restorer.h b/include/restorer.h
index 5152a86..e0ecf6b 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -65,6 +65,7 @@ struct task_restore_core_args {
int pid; /* task pid */
int fd_core; /* opened core file */
+ int fd_fdinfo; /* opened files dump file */
int fd_self_vmas; /* opened file with running VMAs to unmap */
int logfd;
bool restore_threads; /* if to restore threads */
diff --git a/include/types.h b/include/types.h
index b34f024..b329630 100644
--- a/include/types.h
+++ b/include/types.h
@@ -43,6 +43,7 @@
# define PR_SET_MM_ENV_START 10
# define PR_SET_MM_ENV_END 11
# define PR_SET_MM_AUXV 12
+# define PR_SET_MM_EXE_FILE 13
#define PR_SETUP_VDSO_AT 36
diff --git a/restorer.c b/restorer.c
index 401d128..d67288b 100644
--- a/restorer.c
+++ b/restorer.c
@@ -233,6 +233,60 @@ long restore_thread(struct thread_restore_args *args)
sys_exit(0);
}
+static long restore_self_exe_late(struct task_restore_core_args *args)
+{
+ struct fdinfo_entry fe;
+ long ret = -1;
+ char *path;
+
+ /*
+ * Path to exe file and its len is in image.
+ */
+ for (;;) {
+ if (sys_read(args->fd_fdinfo, &fe, sizeof(fe)) != sizeof(fe)) {
+ write_string("sys_read lookup failed\n");
+ goto err;
+ }
+
+ if (fe.type == FDINFO_FD && fe.addr == FDINFO_EXE)
+ break;
+
+ if (fe.len)
+ sys_lseek(args->fd_fdinfo, fe.len, SEEK_CUR);
+ }
+
+ path = (char *)sys_mmap(NULL, fe.len + 1,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if ((long)path < 0) {
+ write_string("sys_mmap failed\n");
+ write_num_n(fe.len);
+ goto err;
+ }
+
+ if (sys_read(args->fd_fdinfo, path, fe.len) != fe.len) {
+ sys_munmap(path, fe.len);
+ write_string("sys_read for exe-path failed\n");
+ goto err;
+ }
+ path[fe.len] = '\0';
+
+ write_string("Restoring EXE (");
+ write_string(path);
+ write_string(")\n");
+
+ ret = sys_prctl_safe(PR_SET_MM, PR_SET_MM_EXE_FILE, (long)path, fe.len + 1);
+
+ sys_munmap(path, fe.len + 1);
+
+ return ret;
+
+err:
+ write_num_n(__LINE__);
+ write_num_n(sys_getpid());
+ return ret;
+}
+
/*
* The main routine to restore task via sigreturn.
* This one is very special, we never return there
@@ -462,6 +516,20 @@ long restore_task(struct task_restore_core_args *args)
goto core_restore_end;
/*
+ * Restoring own /proc/pid/exe symlink is a bit
+ * tricky -- we are to be sure no mmaps are
+ * done over exec we're going to change, that's
+ * why it's don that lately. Moreover, we are
+ * to pass a path to new exec which means the
+ * code should allocate memory enough for (maybe!)
+ * pretty long file name.
+ */
+ ret = restore_self_exe_late(args);
+ sys_close(args->fd_fdinfo);
+ if (ret)
+ goto core_restore_end;
+
+ /*
* We need to prepare a valid sigframe here, so
* after sigreturn the kernel will pick up the
* registers from the frame, set them up and
--
1.7.7.6
More information about the CRIU
mailing list