[CRIU] [PATCH] Add checkpoint/restore for /proc/pid/exe symlink
Cyrill Gorcunov
gorcunov at openvz.org
Mon Feb 6 10:41:36 EST 2012
This patch adds ability to checkpoint/restore
/proc/pid/exe symlink, so if a process we've just
checkpointed has been say /path/to/exe, then at restore
time we bring this path back.
A few notes
- If the file which symlink we're about to restore
has been deleted after checkpoint procedure and
no longer exist -- we print error message but do
not interrupt restore procedure because it's not
that critical
- Also there some restiction from kernel side: if
existing /proc/pid/exe already mapped more than
once, the kernel will refuse to change the symlink,
so we need to restore it lately when mmaps of crtools
itself already unmapped (ie via late call in
restorer.c).
In any case -- we don't treat inability to restore
/proc/pid/exe symlink as a critical error.
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
cr-dump.c | 40 +++++++++++++++++--------
cr-restore.c | 12 +++++++-
files.c | 13 ++++++++
include/image.h | 9 ++++-
include/restorer.h | 1 +
include/types.h | 1 +
restorer.c | 80 +++++++++++++++++++++++++++++++++++++++++++++------
7 files changed, 130 insertions(+), 26 deletions(-)
diff --git a/cr-dump.c b/cr-dump.c
index f5e568d..6d0fbb4 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -138,26 +138,35 @@ err:
return ret;
}
-static int dump_cwd(int pid_dir, struct cr_fdset *cr_fdset)
+static int dump_task_special_files(int pid_dir, struct cr_fdset *cr_fdset)
{
- int ret = -1;
- int fd;
- struct fd_parms p = {
- .fd_name = FDINFO_CWD,
- .pos = 0,
- .flags = 0,
- .id = NULL,
- };
+ struct fd_parms params;
+ int fd, ret;
+ /* Dump /proc/pid/cwd */
+ params = (struct fd_parms){ .fd_name = FDINFO_CWD, };
fd = open_proc(pid_dir, "cwd");
if (fd < 0) {
pr_perror("Failed to openat cwd");
return -1;
}
+ ret = dump_one_reg_file(FDINFO_FD, ¶ms, fd, cr_fdset, 1);
+ if (ret)
+ return ret;
- return dump_one_reg_file(FDINFO_FD, &p, fd, cr_fdset, 1);
-}
+ /* Dump /proc/pid/exe */
+ params = (struct fd_parms){ .fd_name = FDINFO_EXE, };
+ fd = open_proc(pid_dir, "exe");
+ if (fd < 0) {
+ pr_perror("Failed to openat exe");
+ return -1;
+ }
+ ret = dump_one_reg_file(FDINFO_FD, ¶ms, fd, cr_fdset, 1);
+ if (ret)
+ return ret;
+ return ret;
+}
static int dump_pipe_and_data(int lfd, struct pipe_entry *e,
struct cr_fdset *cr_fdset)
@@ -336,8 +345,13 @@ static int dump_task_files(pid_t pid, int pid_dir, struct cr_fdset *cr_fdset)
pr_info("Dumping opened files (pid: %d)\n", pid);
pr_info("----------------------------------------\n");
- if (dump_cwd(pid_dir, cr_fdset)) {
- pr_perror("Can't dump %d's cwd", pid);
+ /*
+ * Dump special files at the beginning. We might need
+ * to re-read them in restorer, so better to make it
+ * fast.
+ */
+ if (dump_task_special_files(pid_dir, cr_fdset)) {
+ pr_err("Can't dump special files\n");
return -1;
}
diff --git a/cr-restore.c b/cr-restore.c
index 12a01e3..91a71f6 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1615,6 +1615,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
LIST_HEAD(self_vma_list);
struct vma_area *vma_area;
int fd_self_vmas = -1;
+ int fd_fdinfo = -1;
int fd_core = -1;
int num;
@@ -1650,8 +1651,16 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
goto err;
fd_core = open_image_ro_nocheck(FMT_FNAME_CORE_OUT, pid);
- if (fd_core < 0)
+ if (fd_core < 0) {
pr_perror("Can't open core-out-%d", pid);
+ goto err;
+ }
+
+ fd_fdinfo = open_image_ro(CR_FD_FDINFO, pid);
+ if (fd_fdinfo < 0) {
+ pr_perror("Can't open fdinfo-%d", pid);
+ goto err;
+ }
if (get_image_path(self_vmas_path, sizeof(self_vmas_path),
FMT_FNAME_VMAS, pid))
@@ -1810,6 +1819,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
task_args->fd_self_vmas = fd_self_vmas;
task_args->logfd = get_logfd();
task_args->sigchld_act = sigchld_act;
+ task_args->fd_fdinfo = fd_fdinfo;
ret = prepare_itimers(pid, task_args);
if (ret < 0)
diff --git a/files.c b/files.c
index 2350049..7c9ceb3 100644
--- a/files.c
+++ b/files.c
@@ -5,6 +5,7 @@
#include <linux/limits.h>
#include <sys/types.h>
+#include <sys/prctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/socket.h>
@@ -230,6 +231,16 @@ static int restore_cwd(struct fdinfo_entry *fe, int fd)
return 0;
}
+static int restore_exe_early(struct fdinfo_entry *fe, int fd)
+{
+ char path[PATH_MAX];
+
+ if (get_file_path(path, fe, fd))
+ return -1;
+
+ pr_info("Skips restoring EXE (%s) symlink at this stage\n", path);
+ return 0;
+}
struct fdinfo_list_entry *find_fdinfo_list_entry(int pid, int fd, struct fdinfo_desc *fi)
{
@@ -454,6 +465,8 @@ static int open_special_fdinfo(int pid, struct fdinfo_entry *fe,
return open_fmap(pid, fe, fdinfo_fd);
if (fe->addr == FDINFO_CWD)
return restore_cwd(fe, fdinfo_fd);
+ if (fe->addr == FDINFO_EXE)
+ return restore_exe_early(fe, fdinfo_fd);
BUG_ON(1);
return -1;
diff --git a/include/image.h b/include/image.h
index 58b13fc..ce7e3d1 100644
--- a/include/image.h
+++ b/include/image.h
@@ -28,7 +28,9 @@
#define FDINFO_FD 1
#define FDINFO_MAP 2
-#define FDINFO_CWD (~0ULL)
+/* Specials */
+#define FDINFO_CWD (-1ULL)
+#define FDINFO_EXE (-2ULL)
#define PAGE_IMAGE_SIZE 4096
#define PAGE_RSS 1
@@ -46,7 +48,10 @@ struct fdinfo_entry {
u8 name[0];
} __packed;
-#define fd_is_special(fe) (((fe)->type != FDINFO_FD) || ((fe)->addr == FDINFO_CWD))
+#define fd_is_special(fe) \
+ (((fe)->type != FDINFO_FD) || \
+ ((fe)->addr == FDINFO_CWD) || \
+ ((fe)->addr == FDINFO_EXE))
struct shmem_entry {
u64 start;
diff --git a/include/restorer.h b/include/restorer.h
index 5152a86..e0ecf6b 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -65,6 +65,7 @@ struct task_restore_core_args {
int pid; /* task pid */
int fd_core; /* opened core file */
+ int fd_fdinfo; /* opened files dump file */
int fd_self_vmas; /* opened file with running VMAs to unmap */
int logfd;
bool restore_threads; /* if to restore threads */
diff --git a/include/types.h b/include/types.h
index b34f024..b329630 100644
--- a/include/types.h
+++ b/include/types.h
@@ -43,6 +43,7 @@
# define PR_SET_MM_ENV_START 10
# define PR_SET_MM_ENV_END 11
# define PR_SET_MM_AUXV 12
+# define PR_SET_MM_EXE_FILE 13
#define PR_SETUP_VDSO_AT 36
diff --git a/restorer.c b/restorer.c
index e450de4..9d3435b 100644
--- a/restorer.c
+++ b/restorer.c
@@ -10,6 +10,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <sched.h>
+#include <limits.h>
#include "compiler.h"
#include "types.h"
@@ -223,6 +224,63 @@ long restore_thread(struct thread_restore_args *args)
sys_exit(0);
}
+#define sys_prctl_safe(opcode, val1, val2, val3) \
+ do { \
+ ret = sys_prctl(opcode, val1, val2, val3, 0); \
+ if (ret) { \
+ write_num_n(__LINE__); \
+ write_num_n(ret); \
+ } \
+ } while (0)
+
+static void restore_self_exe_late(struct task_restore_core_args *args)
+{
+ struct fdinfo_entry fe;
+ char *path;
+ int ret;
+
+ for (;;) {
+ if (sys_read(args->fd_fdinfo, &fe, sizeof(fe)) != sizeof(fe)) {
+ write_string("sys_read lookup failed\n");
+ goto err;
+ }
+
+ if (fe.type == FDINFO_FD && fe.addr == FDINFO_EXE)
+ break;
+
+ if (fe.len)
+ sys_lseek(args->fd_fdinfo, fe.len, SEEK_CUR);
+ }
+
+ path = (char *)sys_mmap(NULL, fe.len + 1,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if ((long)path < 0) {
+ write_string("sys_mmap failed\n");
+ write_num_n(fe.len);
+ goto err;
+ }
+
+ if (sys_read(args->fd_fdinfo, path, fe.len) != fe.len) {
+ sys_munmap(path, fe.len);
+ write_string("sys_read for exe-path failed\n");
+ goto err;
+ }
+ path[fe.len] = '\0';
+
+ write_string("Restoring EXE (");
+ write_string(path);
+ write_string(")\n");
+ sys_prctl_safe(PR_SET_MM, PR_SET_MM_EXE_FILE, (long)path, fe.len + 1);
+
+ sys_munmap(path, fe.len + 1);
+ return;
+
+err:
+ write_num_n(__LINE__);
+ write_num_n(sys_getpid());
+}
+
/*
* The main routine to restore task via sigreturn.
* This one is very special, we never return there
@@ -434,16 +492,6 @@ long restore_task(struct task_restore_core_args *args)
/*
* Tune up the task fields.
*/
-
-#define sys_prctl_safe(opcode, val1, val2, val3) \
- do { \
- ret = sys_prctl(opcode, val1, val2, val3, 0); \
- if (ret) { \
- write_num_n(__LINE__); \
- write_num_n(ret); \
- } \
- } while (0)
-
sys_prctl_safe(PR_SET_NAME, (long)core_entry->tc.comm, 0, 0);
sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)core_entry->tc.mm_start_code, 0);
sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)core_entry->tc.mm_end_code, 0);
@@ -460,6 +508,18 @@ long restore_task(struct task_restore_core_args *args)
sizeof(core_entry->tc.mm_saved_auxv));
/*
+ * Restoring own /proc/pid/exe symlink is a bit
+ * tricky -- we are to be sure no mmaps are
+ * done over exec we're going to change, that's
+ * why it's don that lately. Moreover, we are
+ * to pass a path to new exec which means the
+ * code should allocate memory enough for (maybe!)
+ * pretty long file name.
+ */
+ restore_self_exe_late(args);
+ sys_close(args->fd_fdinfo);
+
+ /*
* We need to prepare a valid sigframe here, so
* after sigreturn the kernel will pick up the
* registers from the frame, set them up and
--
1.7.7.6
More information about the CRIU
mailing list