[CRIU] [PATCH] Rework parasite code
Cyrill Gorcunov
gorcunov at openvz.org
Wed Feb 15 09:02:12 EST 2012
- make control block to keep all information
needed to run injected syscall and parasite
blobs
- add ptrace_swap_area helper
- handle both parasite engine calls and injected
syscalls by single __parasite_execute function
- drop jerr() usage
- bring back handling of inflight signals from
original program inside parasite code
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
cr-dump.c | 2 +-
include/parasite-syscall.h | 29 ++-
include/ptrace.h | 1 +
parasite-syscall.c | 494 +++++++++++++++++++++++---------------------
ptrace.c | 18 ++
5 files changed, 296 insertions(+), 248 deletions(-)
diff --git a/cr-dump.c b/cr-dump.c
index 472cb23..da00927 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -1225,7 +1225,7 @@ static int dump_one_task(struct pstree_item *item, struct cr_fdset *cr_fdset)
goto err;
}
- ret = parasite_cure_seized(parasite_ctl, &vma_area_list);
+ ret = parasite_cure_seized(parasite_ctl);
if (ret) {
pr_err("Can't cure (pid: %d) from parasite\n", pid);
goto err;
diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h
index 9efb9c7..167822c 100644
--- a/include/parasite-syscall.h
+++ b/include/parasite-syscall.h
@@ -13,26 +13,31 @@
/* parasite control block */
struct parasite_ctl {
- pid_t pid; /* process pid where we live in */
+ pid_t pid; /* process pid where we live in */
void * remote_map;
void * local_map;
unsigned long map_length;
- unsigned long parasite_ip; /* service routine start ip */
- void * addr_cmd; /* addr for command */
- void * addr_args; /* address for arguments */
-};
-extern int can_run_syscall(unsigned long ip, unsigned long start, unsigned long end);
+ unsigned long parasite_ip; /* service routine start ip */
+ user_regs_struct_t regs_orig; /* original registers */
+ unsigned long syscall_ip; /* entry point of infection */
+ u8 code_orig[BUILTIN_SYSCALL_SIZE];
+ unsigned long status;
+
+ void * addr_cmd; /* addr for command */
+ void * addr_args; /* address for arguments */
+};
-extern int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_area_list,
- struct cr_fdset *cr_fdset);
extern int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
extern int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset);
struct parasite_dump_misc;
-extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *);
-
-extern struct parasite_ctl *parasite_infect_seized(pid_t pid, int pid_dir, struct list_head *vma_area_list);
-extern int parasite_cure_seized(struct parasite_ctl *ctl, struct list_head *vma_area_list);
+extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);
+extern int parasite_dump_pages_seized(struct parasite_ctl *ctl,
+ struct list_head *vma_area_list,
+ struct cr_fdset *cr_fdset);
+extern int parasite_cure_seized(struct parasite_ctl *ctl);
+extern struct parasite_ctl *parasite_infect_seized(pid_t pid, int pid_dir,
+ struct list_head *vma_area_list);
#endif /* PARASITE_SYSCALL_H_ */
diff --git a/include/ptrace.h b/include/ptrace.h
index 57e63f0..e08f862 100644
--- a/include/ptrace.h
+++ b/include/ptrace.h
@@ -37,6 +37,7 @@ extern int seize_task(pid_t pid);
extern int unseize_task(pid_t pid, enum cr_task_state st);
extern int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes);
extern int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes);
+extern int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes);
extern int ptrace_show_area(pid_t pid, void *addr, long bytes);
extern int ptrace_show_area_r(pid_t pid, void *addr, long bytes);
diff --git a/parasite-syscall.c b/parasite-syscall.c
index f6c13d0..828e3a2 100644
--- a/parasite-syscall.c
+++ b/parasite-syscall.c
@@ -36,6 +36,11 @@ static const char code_syscall[] = {0x0f, 0x05, 0xcc, 0xcc,
#define code_syscall_size (round_up(sizeof(code_syscall), sizeof(long)))
#define parasite_size (round_up(sizeof(parasite_blob), sizeof(long)))
+static int can_run_syscall(unsigned long ip, unsigned long start, unsigned long end)
+{
+ return ip >= start && ip < (end - code_syscall_size);
+}
+
static int syscall_fits_vma_area(struct vma_area *vma_area)
{
return can_run_syscall((unsigned long)vma_area->vma.start,
@@ -43,9 +48,20 @@ static int syscall_fits_vma_area(struct vma_area *vma_area)
(unsigned long)vma_area->vma.end);
}
-int can_run_syscall(unsigned long ip, unsigned long start, unsigned long end)
+static struct vma_area *get_vma_by_ip(struct list_head *vma_area_list, unsigned long ip)
{
- return ip >= start && ip < (end - code_syscall_size);
+ struct vma_area *vma_area;
+
+ list_for_each_entry(vma_area, vma_area_list, list) {
+ if (!in_vma_area(vma_area, ip))
+ continue;
+ if (!(vma_area->vma.prot & PROT_EXEC))
+ continue;
+ if (syscall_fits_vma_area(vma_area))
+ return vma_area;
+ }
+
+ return NULL;
}
/* Note it's destructive on @regs */
@@ -60,218 +76,220 @@ static void parasite_setup_regs(unsigned long new_ip, user_regs_struct_t *regs)
regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF);
}
-/* @regs must already have been tuned up for parasite execution */
-static int syscall_seized(pid_t pid, user_regs_struct_t *regs)
+/* we run at @regs->ip */
+static int __parasite_execute(struct parasite_ctl *ctl, user_regs_struct_t *regs)
{
- unsigned long start_ip;
- char saved[sizeof(code_syscall)];
+ pid_t pid = ctl->pid;
siginfo_t siginfo;
int status;
int ret = -1;
- BUILD_BUG_ON(sizeof(code_syscall) != BUILTIN_SYSCALL_SIZE);
- BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
-
- start_ip = (unsigned long)regs->ip;
- jerr(ptrace_peek_area(pid, (void *)saved, (void *)start_ip, code_syscall_size), err);
- jerr(ptrace_poke_area(pid, (void *)code_syscall, (void *)start_ip, code_syscall_size), err_restore);
-
again:
- jerr(ptrace(PTRACE_SETREGS, pid, NULL, regs), err_restore);
+ if (ptrace(PTRACE_SETREGS, pid, NULL, regs)) {
+ pr_err("Can't set registers (pid: %d)\n", pid);
+ goto err;
+ }
/*
* Most ideas are taken from Tejun Heo's parasite thread
* https://code.google.com/p/ptrace-parasite/
*/
- /*
- * Run the parasite code, at the completion it'll trigger
- * int3 and inform us that all is done.
- */
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL)) {
+ pr_err("Can't continue (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (wait4(pid, &status, __WALL, NULL) != pid) {
+ pr_err("Waited pid mismatch (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (!WIFSTOPPED(status)) {
+ pr_err("Task is still running (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) {
+ pr_err("Can't get siginfo (pid: %d)\n", pid);
+ goto err;
+ }
- jerr(ptrace(PTRACE_CONT, pid, NULL, NULL), err_restore);
- jerr(wait4(pid, &status, __WALL, NULL) != pid, err_restore);
- jerr(!WIFSTOPPED(status), err_restore);
- jerr(ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo),err_restore);
+ if (ptrace(PTRACE_GETREGS, pid, NULL, regs)) {
+ pr_err("Can't obtain registers (pid: %d)\n", pid);
+ goto err;
+ }
if (WSTOPSIG(status) != SIGTRAP || siginfo.si_code != SI_KERNEL) {
retry_signal:
- /* pr_debug("** delivering signal %d si_code=%d\n",
- siginfo.si_signo, siginfo.si_code); */
+ pr_debug("** delivering signal %d si_code=%d\n",
+ siginfo.si_signo, siginfo.si_code);
+
/* FIXME: jerr(siginfo.si_code > 0, err_restore); */
- jerr(ptrace(PTRACE_INTERRUPT, pid, NULL, NULL), err_restore);
- jerr(ptrace(PTRACE_CONT, pid, NULL, (void *)(unsigned long)siginfo.si_signo), err_restore);
- jerr(wait4(pid, &status, __WALL, NULL) != pid, err_restore);
- jerr(!WIFSTOPPED(status), err_restore);
- jerr(ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo), err_restore);
+ /*
+ * This requires some explanation. If a signal from original
+ * program delivered while we're trying to execute our
+ * injected blob -- we need to setup original registers back
+ * so the kernel would make sigframe for us and update the
+ * former registers.
+ *
+ * Then we should swap registers back to our modified copy
+ * and retry.
+ */
+
+ if (ptrace(PTRACE_SETREGS, pid, NULL, &ctl->regs_orig)) {
+ pr_panic("Can't set registers (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL)) {
+ pr_panic("Can't interrupt (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (ptrace(PTRACE_CONT, pid, NULL, (void *)(unsigned long)siginfo.si_signo)) {
+ pr_err("Can't continue (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (wait4(pid, &status, __WALL, NULL) != pid) {
+ pr_err("Waited pid mismatch (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (!WIFSTOPPED(status)) {
+ pr_err("Task is still running (pid: %d)\n", pid);
+ goto err;
+ }
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) {
+ pr_err("Can't get siginfo (pid: %d)\n", pid);
+ goto err;
+ }
if (siginfo.si_code >> 8 != PTRACE_EVENT_STOP)
goto retry_signal;
+ /*
+ * Signal is delivered, so we should update
+ * original registers.
+ */
+ {
+ user_regs_struct_t r;
+ if (ptrace(PTRACE_GETREGS, pid, NULL, &r)) {
+ pr_err("Can't obtain registers (pid: %d)\n", pid);
+ goto err;
+ }
+ ctl->regs_orig = r;
+ }
+
goto again;
}
- ret = 0;
-
/*
* Our code is done.
*/
- jerr(ptrace(PTRACE_INTERRUPT, pid, NULL, NULL), err_restore);
- jerr(ptrace(PTRACE_CONT, pid, NULL, NULL), err_restore);
+ if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL)) {
+ pr_panic("Can't interrupt (pid: %d)\n", pid);
+ goto err;
+ }
- jerr(wait4(pid, &status, __WALL, NULL) != pid, err_restore);
- jerr(!WIFSTOPPED(status), err_restore);
- jerr(ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo), err_restore);
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL)) {
+ pr_err("Can't continue (pid: %d)\n", pid);
+ goto err;
+ }
- jerr((siginfo.si_code >> 8 != PTRACE_EVENT_STOP), err_restore);
+ if (wait4(pid, &status, __WALL, NULL) != pid) {
+ pr_err("Waited pid mismatch (pid: %d)\n", pid);
+ goto err;
+ }
- jerr(ptrace(PTRACE_GETREGS, pid, NULL, regs), err_restore);
+ if (!WIFSTOPPED(status)) {
+ pr_err("Task is still running (pid: %d)\n", pid);
+ goto err;
+ }
- ret = 0;
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo)) {
+ pr_err("Can't get siginfo (pid: %d)\n", pid);
+ goto err;
+ }
-err_restore:
- if (ptrace_poke_area(pid, (void *)saved, (void *)start_ip, code_syscall_size)) {
- pr_panic("Crap... Can't restore data (pid: %d)\n", pid);
- ret = -1;
+ if (siginfo.si_code >> 8 != PTRACE_EVENT_STOP) {
+ pr_err("si_code doesn't match (pid: %d si_code: %d)\n",
+ pid, siginfo.si_code);
+ goto err;
}
+
+ ret = 0;
err:
return ret;
}
-static void *mmap_seized(pid_t pid, user_regs_struct_t *regs,
- void *addr, size_t length, int prot,
- int flags, int fd, off_t offset)
+static int parasite_execute(unsigned long cmd, struct parasite_ctl *ctl,
+ parasite_status_t *args, int args_size)
{
- void *mmaped = NULL;
int ret;
- regs->ax = (unsigned long)__NR_mmap; /* mmap */
- regs->di = (unsigned long)addr; /* @addr */
- regs->si = (unsigned long)length; /* @length */
- regs->dx = (unsigned long)prot; /* @prot */
- regs->r10 = (unsigned long)flags; /* @flags */
- regs->r8 = (unsigned long)fd; /* @fd */
- regs->r9 = (unsigned long)offset; /* @offset */
-
- ret = syscall_seized(pid, regs);
- if (ret)
- goto err;
- mmaped = (void *)regs->ax;
+ user_regs_struct_t regs = ctl->regs_orig;
- /* error code from the kernel space */
- if ((long)mmaped < 0)
- mmaped = NULL;
-err:
- return mmaped;
-}
+ memcpy(ctl->addr_cmd, &cmd, sizeof(cmd));
+ memcpy(ctl->addr_args, args, args_size);
-static int munmap_seized(pid_t pid, user_regs_struct_t *regs,
- void *addr, size_t length)
-{
- int ret;
+ parasite_setup_regs(ctl->parasite_ip, ®s);
- regs->ax = (unsigned long)__NR_munmap; /* mmap */
- regs->di = (unsigned long)addr; /* @addr */
- regs->si = (unsigned long)length; /* @length */
+ ret = __parasite_execute(ctl, ®s);
- ret = syscall_seized(pid, regs);
+ memcpy(args, ctl->addr_args, args_size);
if (!ret)
- ret = (int)regs->ax;
+ ret = args->ret;
return ret;
}
-static struct vma_area *get_vma_by_ip(struct list_head *vma_area_list, unsigned long ip)
-{
- struct vma_area *vma_area;
-
- list_for_each_entry(vma_area, vma_area_list, list) {
- if (!in_vma_area(vma_area, ip))
- continue;
- if (!(vma_area->vma.prot & PROT_EXEC))
- continue;
- if (syscall_fits_vma_area(vma_area))
- return vma_area;
- }
-
- return NULL;
-}
-
-static int parasite_execute(unsigned long cmd, struct parasite_ctl *ctl,
- parasite_status_t *args, int args_size)
+static void *mmap_seized(struct parasite_ctl *ctl,
+ void *addr, size_t length, int prot,
+ int flags, int fd, off_t offset)
{
- user_regs_struct_t regs, regs_orig;
- int status, ret = -1;
- siginfo_t siginfo;
-
- jerr(ptrace(PTRACE_GETREGS, ctl->pid, NULL, ®s_orig), err);
-
- /*
- * Pass the command first, it's immutable.
- */
- memcpy(ctl->addr_cmd, &cmd, sizeof(cmd));
-again:
- regs = regs_orig;
- parasite_setup_regs(ctl->parasite_ip, ®s);
- jerr(ptrace(PTRACE_SETREGS, ctl->pid, NULL, ®s), err_restore);
-
- memcpy(ctl->addr_args, args, args_size);
-
- jerr(ptrace(PTRACE_CONT, (long)ctl->pid, NULL, NULL), err_restore);
- jerr(wait4((long)ctl->pid, &status, __WALL, NULL) != (long)ctl->pid, err_restore);
- jerr(!WIFSTOPPED(status), err_restore);
- jerr(ptrace(PTRACE_GETSIGINFO, (long)ctl->pid, NULL, &siginfo), err_restore);
-
- if (WSTOPSIG(status) != SIGTRAP || siginfo.si_code != SI_KERNEL) {
-retry_signal:
- /* pr_debug("** delivering signal %d si_code=%d\n",
- siginfo.si_signo, siginfo.si_code); */
- /* FIXME: jerr(siginfo.si_code > 0, err_restore_full); */
- jerr(ptrace(PTRACE_SETREGS, (long)ctl->pid, NULL, (void *)®s_orig), err_restore);
- jerr(ptrace(PTRACE_INTERRUPT, (long)ctl->pid, NULL, NULL), err_restore);
- jerr(ptrace(PTRACE_CONT, (long)ctl->pid, NULL, (void *)(unsigned long)siginfo.si_signo), err_restore);
+ user_regs_struct_t regs = ctl->regs_orig;
+ void *map = NULL;
+ int ret;
- jerr(wait4((long)ctl->pid, &status, __WALL, NULL) != (long)ctl->pid, err_restore);
- jerr(!WIFSTOPPED(status), err_restore);
- jerr(ptrace(PTRACE_GETSIGINFO, (long)ctl->pid, NULL, &siginfo), err_restore);
+ regs.ax = (unsigned long)__NR_mmap; /* mmap */
+ regs.di = (unsigned long)addr; /* @addr */
+ regs.si = (unsigned long)length; /* @length */
+ regs.dx = (unsigned long)prot; /* @prot */
+ regs.r10= (unsigned long)flags; /* @flags */
+ regs.r8 = (unsigned long)fd; /* @fd */
+ regs.r9 = (unsigned long)offset; /* @offset */
- if (siginfo.si_code >> 8 != PTRACE_EVENT_STOP)
- goto retry_signal;
+ parasite_setup_regs(ctl->syscall_ip, ®s);
- goto again;
- }
+ ret = __parasite_execute(ctl, ®s);
+ if (ret)
+ goto err;
- /*
- * Check if error happened during dumping.
- */
- memcpy(args, ctl->addr_args, args_size);
- if (args->ret) {
- pr_panic("Dumping sigactions failed with %li (%li) at %li\n",
- args->ret, args->sys_ret, args->line);
- goto err_restore;
- }
+ if ((long)regs.ax > 0)
+ map = (void *)regs.ax;
+err:
+ return map;
+}
- /*
- * Our code is done.
- */
- jerr(ptrace(PTRACE_INTERRUPT, (long)ctl->pid, NULL, NULL), err_restore);
- jerr(ptrace(PTRACE_CONT, (long)ctl->pid, NULL, NULL), err_restore);
+static int munmap_seized(struct parasite_ctl *ctl, void *addr, size_t length)
+{
+ user_regs_struct_t regs = ctl->regs_orig;
+ int ret;
- jerr(wait4((long)ctl->pid, &status, __WALL, NULL) != (long)ctl->pid, err_restore);
- jerr(!WIFSTOPPED(status), err_restore);
- jerr(ptrace(PTRACE_GETSIGINFO, (long)ctl->pid, NULL, &siginfo), err_restore);
+ regs.ax = (unsigned long)__NR_munmap; /* mmap */
+ regs.di = (unsigned long)addr; /* @addr */
+ regs.si = (unsigned long)length; /* @length */
- jerr((siginfo.si_code >> 8 != PTRACE_EVENT_STOP), err_restore);
+ parasite_setup_regs(ctl->syscall_ip, ®s);
- ret = 0;
+ ret = __parasite_execute(ctl, ®s);
+ if (!ret)
+ ret = (int)regs.ax;
-err_restore:
- if (ptrace(PTRACE_SETREGS, (long)ctl->pid, NULL, ®s_orig)) {
- pr_panic("Can't restore registers (pid: %d)\n", ctl->pid);
- ret = -1;
- }
-err:
return ret;
}
@@ -313,8 +331,8 @@ out:
return ret;
}
-static int parasite_prep_file(int type,
- struct parasite_ctl *ctl, struct cr_fdset *fdset)
+static int parasite_prep_file(int type, struct parasite_ctl *ctl,
+ struct cr_fdset *fdset)
{
int ret;
@@ -330,24 +348,23 @@ static int parasite_prep_file(int type,
return 0;
}
-static int parasite_file_cmd(int cmd, int type,
- struct parasite_ctl *ctl, struct cr_fdset *cr_fdset)
+static int parasite_file_cmd(char *what, int cmd, int type,
+ struct parasite_ctl *ctl,
+ struct cr_fdset *cr_fdset)
{
parasite_status_t args = { };
- int status, ret = -1;
+ int ret = -1;
pr_info("\n");
- pr_info("Dumping sigactions (pid: %d)\n", ctl->pid);
+ pr_info("Dumping %s (pid: %d)\n", what, ctl->pid);
pr_info("----------------------------------------\n");
ret = parasite_prep_file(type, ctl, cr_fdset);
if (ret < 0)
goto out;
- ret = parasite_execute(cmd, ctl,
- (parasite_status_t *)&args, sizeof(args));
+ ret = parasite_execute(cmd, ctl, (parasite_status_t *)&args, sizeof(args));
-err:
fchmod(cr_fdset->fds[type], CR_FD_PERM);
out:
pr_info("----------------------------------------\n");
@@ -358,13 +375,11 @@ out:
static int parasite_init(struct parasite_ctl *ctl, pid_t pid)
{
struct parasite_init_args args = { };
- int ret;
args.sun_len = get_socket_name(&args.saddr, pid);
- ret = parasite_execute(PARASITE_CMD_INIT, ctl,
- (parasite_status_t *)&args, sizeof(args));
- return ret;
+ return parasite_execute(PARASITE_CMD_INIT, ctl,
+ (parasite_status_t *)&args, sizeof(args));
}
static int parasite_set_logfd(struct parasite_ctl *ctl, pid_t pid)
@@ -376,8 +391,7 @@ static int parasite_set_logfd(struct parasite_ctl *ctl, pid_t pid)
if (ret)
return ret;
- ret = parasite_execute(PARASITE_CMD_SET_LOGFD, ctl,
- &args, sizeof(args));
+ ret = parasite_execute(PARASITE_CMD_SET_LOGFD, ctl, &args, sizeof(args));
if (ret < 0)
return ret;
@@ -386,18 +400,21 @@ static int parasite_set_logfd(struct parasite_ctl *ctl, pid_t pid)
int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset)
{
- return parasite_file_cmd(PARASITE_CMD_DUMP_SIGACTS, CR_FD_SIGACT, ctl, cr_fdset);
+ return parasite_file_cmd("sigactions", PARASITE_CMD_DUMP_SIGACTS,
+ CR_FD_SIGACT, ctl, cr_fdset);
}
int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct cr_fdset *cr_fdset)
{
- return parasite_file_cmd(PARASITE_CMD_DUMP_ITIMERS, CR_FD_ITIMERS, ctl, cr_fdset);
+ return parasite_file_cmd("timers", PARASITE_CMD_DUMP_ITIMERS,
+ CR_FD_ITIMERS, ctl, cr_fdset);
}
int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc)
{
return parasite_execute(PARASITE_CMD_DUMP_MISC, ctl,
- (parasite_status_t *)misc, sizeof(struct parasite_dump_misc));
+ (parasite_status_t *)misc,
+ sizeof(struct parasite_dump_misc));
}
/*
@@ -409,11 +426,9 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
{
struct parasite_dump_pages_args parasite_dumppages = { };
parasite_status_t *st = ¶site_dumppages.status;
- user_regs_struct_t regs, regs_orig;
unsigned long nrpages_dumped = 0;
struct vma_area *vma_area;
- siginfo_t siginfo;
- int status, ret = -1;
+ int ret = -1;
pr_info("\n");
pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, ctl->pid);
@@ -466,8 +481,8 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
}
ret = parasite_execute(PARASITE_CMD_DUMPPAGES, ctl,
- (parasite_status_t *) ¶site_dumppages,
- sizeof(parasite_dumppages));
+ (parasite_status_t *) ¶site_dumppages,
+ sizeof(parasite_dumppages));
if (ret) {
pr_panic("Dumping pages failed with %li (%li) at %li\n",
parasite_dumppages.status.ret,
@@ -500,90 +515,114 @@ out:
return ret;
}
-int parasite_cure_seized(struct parasite_ctl *ctl, struct list_head *vma_area_list)
+int parasite_cure_seized(struct parasite_ctl *ctl)
{
- user_regs_struct_t regs, regs_orig;
- struct vma_area *vma_area;
- int ret = -1;
parasite_status_t args = { };
+ int ret = 0;
- ret = parasite_execute(PARASITE_CMD_FINI, ctl,
- &args, sizeof(args));
- if (ret) {
- pr_err("Can't finalize parasite (pid: %d) task\n", ctl->pid);
- goto err;
+ if (ctl->parasite_ip) {
+ if (parasite_execute(PARASITE_CMD_FINI, ctl, &args, sizeof(args))) {
+ pr_err("Can't finalize parasite (pid: %d) task\n", ctl->pid);
+ ret = -1;
+ }
}
- jerr(ptrace(PTRACE_GETREGS, ctl->pid, NULL, ®s), err);
-
- regs_orig = regs;
-
- vma_area = get_vma_by_ip(vma_area_list, regs.ip);
- if (!vma_area) {
- pr_err("No suitable VMA found to run cure (pid: %d)\n", ctl->pid);
- goto err;
+ if (ctl->remote_map) {
+ if (munmap_seized(ctl, (void *)ctl->remote_map, ctl->map_length)) {
+ pr_panic("munmap_seized failed (pid: %d)\n", ctl->pid);
+ ret = -1;
+ }
}
- parasite_setup_regs(vma_area->vma.start, ®s);
+ if (ctl->local_map) {
+ if (munmap(ctl->local_map, parasite_size)) {
+ pr_panic("munmap failed (pid: %d)\n", ctl->pid);
+ ret = -1;
+ }
+ }
- ret = munmap_seized(ctl->pid, ®s, ctl->remote_map, ctl->map_length);
- if (ret)
- pr_err("munmap_seized failed (pid: %d)\n", ctl->pid);
+ if (ptrace_poke_area(ctl->pid, (void *)ctl->code_orig,
+ (void *)ctl->syscall_ip, sizeof(ctl->code_orig))) {
+ pr_panic("Can't restore syscall blob (pid: %d)\n", ctl->pid);
+ ret = -1;
+ }
- if (ptrace(PTRACE_SETREGS, ctl->pid, NULL, ®s_orig)) {
- pr_panic("PTRACE_SETREGS failed (pid: %d)\n", ctl->pid);
+ if (ptrace(PTRACE_SETREGS, ctl->pid, NULL, &ctl->regs_orig)) {
+ pr_panic("Can't restore registers (pid: %d)\n", ctl->pid);
ret = -1;
}
free(ctl);
-err:
return ret;
}
struct parasite_ctl *parasite_infect_seized(pid_t pid, int pid_dir, struct list_head *vma_area_list)
{
- parasite_status_t args = { };
- user_regs_struct_t regs, regs_orig;
struct parasite_ctl *ctl = NULL;
struct vma_area *vma_area;
char fname[128];
int ret, fd;
+ /*
+ * Control block early setup.
+ */
ctl = xzalloc(sizeof(*ctl));
if (!ctl) {
pr_err("Parasite control block allocation failed (pid: %d)\n", pid);
goto err;
}
- if (ptrace(PTRACE_GETREGS, pid, NULL, ®s))
- pr_err_jmp(err);
+ if (ptrace(PTRACE_GETREGS, pid, NULL, &ctl->regs_orig)) {
+ pr_err("Can't obtain registers (pid: %d)\n", pid);
+ goto err;
+ }
- vma_area = get_vma_by_ip(vma_area_list, regs.ip);
+ vma_area = get_vma_by_ip(vma_area_list, ctl->regs_orig.ip);
if (!vma_area) {
pr_err("No suitable VMA found to run parasite "
- "bootstrap code (pid: %d)\n", pid);
+ "bootstrap code (pid: %d)\n", pid);
goto err;
}
- regs_orig = regs;
- parasite_setup_regs(vma_area->vma.start, ®s);
+ ctl->pid = pid;
+ ctl->syscall_ip = vma_area->vma.start;
+
+ /*
+ * Inject syscall instruction and remember original code,
+ * we will need it to restore original program content.
+ */
+ BUILD_BUG_ON(sizeof(code_syscall) != sizeof(ctl->code_orig));
+ BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
+
+ memcpy(ctl->code_orig, code_syscall, sizeof(ctl->code_orig));
+ if (ptrace_swap_area(ctl->pid, (void *)ctl->syscall_ip,
+ (void *)ctl->code_orig, sizeof(ctl->code_orig))) {
+ pr_err("Can't inject syscall blob (pid: %d)\n", pid);
+ goto err;
+ }
- ctl->remote_map = mmap_seized(pid, ®s, NULL, (size_t)parasite_size,
+ /*
+ * Inject a parasite engine. Ie allocate memory inside alien
+ * space and copy engine code there. Then re-map the engine
+ * locally, so we will get an easy way to access engine memory
+ * without using ptrace at all.
+ */
+ ctl->remote_map = mmap_seized(ctl, NULL, (size_t)parasite_size,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
if (!ctl->remote_map) {
pr_err("Can't allocate memory for parasite blob (pid: %d)\n", pid);
- goto err_restore_regs;
+ goto err_restore;
}
ctl->map_length = round_up(parasite_size, PAGE_SIZE);
snprintf(fname, sizeof(fname), "map_files/%p-%p",
- ctl->remote_map, ctl->remote_map + ctl->map_length);
+ ctl->remote_map, ctl->remote_map + ctl->map_length);
fd = openat(pid_dir, fname, O_RDWR);
if (fd < 0) {
pr_perror("Can't open remote parasite map");
- goto err_munmap_restore;
+ goto err_restore;
}
ctl->local_map = mmap(NULL, parasite_size, PROT_READ | PROT_WRITE,
@@ -591,17 +630,15 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, int pid_dir, struct list_
close(fd);
if (ctl->local_map == MAP_FAILED) {
+ ctl->local_map = NULL;
pr_perror("Can't map remote parasite map");
- goto err_munmap_restore;
+ goto err_restore;
}
pr_info("Putting parasite blob into %p->%p\n", ctl->local_map, ctl->remote_map);
memcpy(ctl->local_map, parasite_blob, sizeof(parasite_blob));
- jerr(ptrace(PTRACE_SETREGS, pid, NULL, ®s_orig), err_munmap_restore);
-
- /* Setup control block */
- ctl->pid = pid;
+ /* Setup the rest of a control block */
ctl->parasite_ip = PARASITE_HEAD_ADDR((unsigned long)ctl->remote_map);
ctl->addr_cmd = (void *)PARASITE_CMD_ADDR((unsigned long)ctl->local_map);
ctl->addr_args = (void *)PARASITE_ARGS_ADDR((unsigned long)ctl->local_map);
@@ -609,32 +646,19 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, int pid_dir, struct list_
ret = parasite_init(ctl, pid);
if (ret) {
pr_err("%d: Can't create a transport socket\n", pid);
- goto err_munmap_restore;
+ goto err_restore;
}
ret = parasite_set_logfd(ctl, pid);
if (ret) {
pr_err("%d: Can't set a logging descriptor\n", pid);
- goto err_fini;
+ goto err_restore;
}
return ctl;
-err_fini:
- ret = parasite_execute(PARASITE_CMD_FINI, ctl,
- &args, sizeof(args));
- if (ret)
- pr_panic("Can't finalize parasite (pid: %d) task\n", ctl->pid);
-
-err_munmap_restore:
- regs = regs_orig;
- parasite_setup_regs(vma_area->vma.start, ®s);
- if (munmap_seized(pid, ®s, ctl->remote_map, ctl->map_length))
- pr_panic("mmap_seized failed (pid: %d)\n", pid);
-
-err_restore_regs:
- if (ptrace(PTRACE_SETREGS, pid, NULL, ®s_orig))
- pr_panic("PTRACE_SETREGS failed (pid: %d)\n", pid);
+err_restore:
+ parasite_cure_seized(ctl);
err:
xfree(ctl);
diff --git a/ptrace.c b/ptrace.c
index 8a44d81..ce1ed7f 100644
--- a/ptrace.c
+++ b/ptrace.c
@@ -175,3 +175,21 @@ err:
return -2;
}
+/* don't swap big space, it might overflow the stack */
+int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes)
+{
+ void *t = alloca(bytes);
+
+ if (ptrace_peek_area(pid, t, dst, bytes))
+ return -1;
+
+ if (ptrace_poke_area(pid, src, dst, bytes)) {
+ if (ptrace_poke_area(pid, t, dst, bytes))
+ return -2;
+ return -1;
+ }
+
+ memcpy(src, t, bytes);
+
+ return 0;
+}
--
1.7.7.6
More information about the CRIU
mailing list