[CRIU] Re: Before we do the CRIU v0.1 release
Cyrill Gorcunov
gorcunov at openvz.org
Thu Jul 19 12:02:55 EDT 2012
On Thu, Jul 19, 2012 at 03:02:54PM +0400, Pavel Emelyanov wrote:
>
> Let's try to finish it till the Friday evening.
Here is a final pile. Please give it a shot. Comments are welcome!
Cyrill
-------------- next part --------------
>From df642fc77bb1c57ea3c751365daf10979a713f40 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov at openvz.org>
Date: Thu, 19 Jul 2012 13:23:01 +0400
Subject: [PATCH] protobuf: Convert core_entry to PB format
This requires some exlanations
- Since we use protobuf data in restorer
code we need to carry a copy of appropriate
PB entities in resident memory. For this
sake task_restore_core_args and thread_restore_args
were significantly reworked. In short -- the caller
code fills PB structures into task arguments space.
- image_header structure is vanished, it makes no
sense to carry it, we provide "version" field in
CoreEntry itself
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
cr-dump.c | 267 +++++++++++++++++++++++++++++++++------------------
cr-restore.c | 106 +++++++++++++--------
cr-show.c | 85 ----------------
include/image.h | 121 -----------------------
include/restorer.h | 10 ++-
protobuf/Makefile | 1 +
protobuf/core.proto | 83 ++++++++++++++++
restorer.c | 46 +++-------
8 files changed, 345 insertions(+), 374 deletions(-)
create mode 100644 protobuf/core.proto
diff --git a/cr-dump.c b/cr-dump.c
index 2ce3c76..3f8bf38 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -51,6 +51,7 @@
#include "protobuf/fs.pb-c.h"
#include "protobuf/mm.pb-c.h"
#include "protobuf/creds.pb-c.h"
+#include "protobuf/core.pb-c.h"
#ifndef CONFIG_X86_64
# error No x86-32 support yet
@@ -490,8 +491,8 @@ static int dump_task_creds(pid_t pid, const struct parasite_dump_misc *misc,
return pb_write(fdset_fd(fds, CR_FD_CREDS), &ce, creds_entry);
}
-#define assign_reg(dst, src, e) dst.e = (__typeof__(dst.e))src.e
-#define assign_array(dst, src, e) memcpy(&dst.e, &src.e, sizeof(dst.e))
+#define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))src.e
+#define assign_array(dst, src, e) memcpy(dst->e, &src.e, sizeof(src.e))
static int get_task_auxv(pid_t pid, MmEntry *mm)
{
@@ -585,7 +586,7 @@ err:
return ret;
}
-static int get_task_regs(pid_t pid, struct core_entry *core, const struct parasite_ctl *ctl)
+static int get_task_regs(pid_t pid, CoreEntry *core, const struct parasite_ctl *ctl)
{
user_fpregs_struct_t fpregs = {-1};
user_regs_struct_t regs = {-1};
@@ -624,46 +625,51 @@ static int get_task_regs(pid_t pid, struct core_entry *core, const struct parasi
}
}
- assign_reg(core->arch.gpregs, regs, r15);
- assign_reg(core->arch.gpregs, regs, r14);
- assign_reg(core->arch.gpregs, regs, r13);
- assign_reg(core->arch.gpregs, regs, r12);
- assign_reg(core->arch.gpregs, regs, bp);
- assign_reg(core->arch.gpregs, regs, bx);
- assign_reg(core->arch.gpregs, regs, r11);
- assign_reg(core->arch.gpregs, regs, r10);
- assign_reg(core->arch.gpregs, regs, r9);
- assign_reg(core->arch.gpregs, regs, r8);
- assign_reg(core->arch.gpregs, regs, ax);
- assign_reg(core->arch.gpregs, regs, cx);
- assign_reg(core->arch.gpregs, regs, dx);
- assign_reg(core->arch.gpregs, regs, si);
- assign_reg(core->arch.gpregs, regs, di);
- assign_reg(core->arch.gpregs, regs, orig_ax);
- assign_reg(core->arch.gpregs, regs, ip);
- assign_reg(core->arch.gpregs, regs, cs);
- assign_reg(core->arch.gpregs, regs, flags);
- assign_reg(core->arch.gpregs, regs, sp);
- assign_reg(core->arch.gpregs, regs, ss);
- assign_reg(core->arch.gpregs, regs, fs_base);
- assign_reg(core->arch.gpregs, regs, gs_base);
- assign_reg(core->arch.gpregs, regs, ds);
- assign_reg(core->arch.gpregs, regs, es);
- assign_reg(core->arch.gpregs, regs, fs);
- assign_reg(core->arch.gpregs, regs, gs);
-
- assign_reg(core->arch.fpregs, fpregs, cwd);
- assign_reg(core->arch.fpregs, fpregs, swd);
- assign_reg(core->arch.fpregs, fpregs, twd);
- assign_reg(core->arch.fpregs, fpregs, fop);
- assign_reg(core->arch.fpregs, fpregs, rip);
- assign_reg(core->arch.fpregs, fpregs, rdp);
- assign_reg(core->arch.fpregs, fpregs, mxcsr);
- assign_reg(core->arch.fpregs, fpregs, mxcsr_mask);
-
- assign_array(core->arch.fpregs, fpregs, st_space);
- assign_array(core->arch.fpregs, fpregs, xmm_space);
- assign_array(core->arch.fpregs, fpregs, padding);
+ assign_reg(core->arch_x86->gpregs, regs, r15);
+ assign_reg(core->arch_x86->gpregs, regs, r14);
+ assign_reg(core->arch_x86->gpregs, regs, r13);
+ assign_reg(core->arch_x86->gpregs, regs, r12);
+ assign_reg(core->arch_x86->gpregs, regs, bp);
+ assign_reg(core->arch_x86->gpregs, regs, bx);
+ assign_reg(core->arch_x86->gpregs, regs, r11);
+ assign_reg(core->arch_x86->gpregs, regs, r10);
+ assign_reg(core->arch_x86->gpregs, regs, r9);
+ assign_reg(core->arch_x86->gpregs, regs, r8);
+ assign_reg(core->arch_x86->gpregs, regs, ax);
+ assign_reg(core->arch_x86->gpregs, regs, cx);
+ assign_reg(core->arch_x86->gpregs, regs, dx);
+ assign_reg(core->arch_x86->gpregs, regs, si);
+ assign_reg(core->arch_x86->gpregs, regs, di);
+ assign_reg(core->arch_x86->gpregs, regs, orig_ax);
+ assign_reg(core->arch_x86->gpregs, regs, ip);
+ assign_reg(core->arch_x86->gpregs, regs, cs);
+ assign_reg(core->arch_x86->gpregs, regs, flags);
+ assign_reg(core->arch_x86->gpregs, regs, sp);
+ assign_reg(core->arch_x86->gpregs, regs, ss);
+ assign_reg(core->arch_x86->gpregs, regs, fs_base);
+ assign_reg(core->arch_x86->gpregs, regs, gs_base);
+ assign_reg(core->arch_x86->gpregs, regs, ds);
+ assign_reg(core->arch_x86->gpregs, regs, es);
+ assign_reg(core->arch_x86->gpregs, regs, fs);
+ assign_reg(core->arch_x86->gpregs, regs, gs);
+
+ assign_reg(core->arch_x86->fpregs, fpregs, cwd);
+ assign_reg(core->arch_x86->fpregs, fpregs, swd);
+ assign_reg(core->arch_x86->fpregs, fpregs, twd);
+ assign_reg(core->arch_x86->fpregs, fpregs, fop);
+ assign_reg(core->arch_x86->fpregs, fpregs, rip);
+ assign_reg(core->arch_x86->fpregs, fpregs, rdp);
+ assign_reg(core->arch_x86->fpregs, fpregs, mxcsr);
+ assign_reg(core->arch_x86->fpregs, fpregs, mxcsr_mask);
+
+ /* Make sure we have enough space */
+ BUG_ON(core->arch_x86->fpregs->n_st_space != ARRAY_SIZE(fpregs.st_space));
+ BUG_ON(core->arch_x86->fpregs->n_xmm_space != ARRAY_SIZE(fpregs.xmm_space));
+ BUG_ON(core->arch_x86->fpregs->n_padding != ARRAY_SIZE(fpregs.padding));
+
+ assign_array(core->arch_x86->fpregs, fpregs, st_space);
+ assign_array(core->arch_x86->fpregs, fpregs, xmm_space);
+ assign_array(core->arch_x86->fpregs, fpregs, padding);
ret = 0;
@@ -671,23 +677,12 @@ err:
return ret;
}
-static int dump_task_core(struct core_entry *core, int fd_core)
-{
- pr_info("Dumping header ... ");
-
- core->header.version = HEADER_VERSION;
- core->header.arch = HEADER_ARCH_X86_64;
- core->header.flags = 0;
-
- return write_img(fd_core, core);
-}
-
static DECLARE_KCMP_TREE(vm_tree, KCMP_VM);
static DECLARE_KCMP_TREE(fs_tree, KCMP_FS);
static DECLARE_KCMP_TREE(files_tree, KCMP_FILES);
static DECLARE_KCMP_TREE(sighand_tree, KCMP_SIGHAND);
-static int dump_task_kobj_ids(pid_t pid, struct core_entry *core)
+static int dump_task_kobj_ids(pid_t pid, CoreEntry *core)
{
int new;
struct kid_elem elem;
@@ -697,29 +692,29 @@ static int dump_task_kobj_ids(pid_t pid, struct core_entry *core)
elem.genid = 0; /* FIXME optimize */
new = 0;
- core->ids.vm_id = kid_generate_gen(&vm_tree, &elem, &new);
- if (!core->ids.vm_id || !new) {
+ core->ids->vm_id = kid_generate_gen(&vm_tree, &elem, &new);
+ if (!core->ids->vm_id || !new) {
pr_err("Can't make VM id for %d\n", pid);
return -1;
}
new = 0;
- core->ids.fs_id = kid_generate_gen(&fs_tree, &elem, &new);
- if (!core->ids.fs_id || !new) {
+ core->ids->fs_id = kid_generate_gen(&fs_tree, &elem, &new);
+ if (!core->ids->fs_id || !new) {
pr_err("Can't make FS id for %d\n", pid);
return -1;
}
new = 0;
- core->ids.files_id = kid_generate_gen(&files_tree, &elem, &new);
- if (!core->ids.files_id || !new) {
+ core->ids->files_id = kid_generate_gen(&files_tree, &elem, &new);
+ if (!core->ids->files_id || !new) {
pr_err("Can't make FILES id for %d\n", pid);
return -1;
}
new = 0;
- core->ids.sighand_id = kid_generate_gen(&sighand_tree, &elem, &new);
- if (!core->ids.sighand_id || !new) {
+ core->ids->sighand_id = kid_generate_gen(&sighand_tree, &elem, &new);
+ if (!core->ids->sighand_id || !new) {
pr_err("Can't make IO id for %d\n", pid);
return -1;
}
@@ -727,21 +722,106 @@ static int dump_task_kobj_ids(pid_t pid, struct core_entry *core)
return 0;
}
+static void core_entry_free(CoreEntry *core)
+{
+ if (core) {
+ if (core->arch_x86) {
+ if (core->arch_x86->fpregs) {
+ xfree(core->arch_x86->fpregs->st_space);
+ xfree(core->arch_x86->fpregs->xmm_space);
+ xfree(core->arch_x86->fpregs->padding);
+ }
+ xfree(core->arch_x86->gpregs);
+ xfree(core->arch_x86->fpregs);
+ }
+ xfree(core->arch_x86);
+ xfree(core->tc);
+ xfree(core->ids);
+ }
+}
+
+static CoreEntry *core_entry_alloc(void)
+{
+ CoreEntry *core;
+ ArchX86Entry *arch_x86;
+ UserX86RegsEntry *gpregs;
+ UserX86FpregsEntry *fpregs;
+ TaskCoreEntry *tc;
+ CoreIdsEntry *ids;
+
+ core = xmalloc(sizeof(*core));
+ if (!core)
+ return NULL;
+ core_entry__init(core);
+
+ arch_x86 = xmalloc(sizeof(*arch_x86));
+ if (!arch_x86)
+ goto err;
+ arch_x86_entry__init(arch_x86);
+ core->arch_x86 = arch_x86;
+
+ gpregs = xmalloc(sizeof(*gpregs));
+ if (!gpregs)
+ goto err;
+ user_x86_regs_entry__init(gpregs);
+ arch_x86->gpregs = gpregs;
+
+ fpregs = xmalloc(sizeof(*fpregs));
+ if (!fpregs)
+ goto err;
+ user_x86_fpregs_entry__init(fpregs);
+ arch_x86->fpregs = fpregs;
+
+ /* These are numbers from kernel */
+ fpregs->n_st_space = 32;
+ fpregs->n_xmm_space = 64;
+ fpregs->n_padding = 24;
+
+ fpregs->st_space = xzalloc(pb_repeated_size(fpregs, st_space));
+ fpregs->xmm_space = xzalloc(pb_repeated_size(fpregs, xmm_space));
+ fpregs->padding = xzalloc(pb_repeated_size(fpregs, padding));
+
+ if (!fpregs->st_space || !fpregs->xmm_space || !fpregs->padding)
+ goto err;
+
+ tc = xzalloc(sizeof(*tc) + TASK_COMM_LEN);
+ if (!tc)
+ goto err;
+ task_core_entry__init(tc);
+ tc->comm = (void *)tc + sizeof(*tc);
+ core->tc = tc;
+
+ ids = xmalloc(sizeof(*ids));
+ if (!ids)
+ goto err;
+ core_ids_entry__init(ids);
+ core->ids = ids;
+
+ core->version = 1;
+ core->mtype = CORE_ENTRY__MARCH__X86_64;
+
+ return core;
+err:
+ core_entry_free(core);
+ return NULL;
+}
+
static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat,
const struct parasite_dump_misc *misc, const struct parasite_ctl *ctl,
const struct cr_fdset *cr_fdset)
{
- struct core_entry *core;
+ int fd_core = fdset_fd(cr_fdset, CR_FD_CORE);
+ CoreEntry *core;
int ret = -1;
+ core = core_entry_alloc();
+ if (!core)
+ return -1;
+
pr_info("\n");
pr_info("Dumping core (pid: %d)\n", pid);
pr_info("----------------------------------------\n");
- core = xzalloc(sizeof(*core));
- if (!core)
- goto err;
-
ret = dump_task_kobj_ids(pid, core);
if (ret)
goto err_free;
@@ -754,26 +834,27 @@ static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat,
if (ret)
goto err_free;
- ret = get_task_personality(pid, &core->tc.personality);
+ ret = get_task_personality(pid, &core->tc->personality);
if (ret)
goto err_free;
- strncpy((char *)core->tc.comm, stat->comm, TASK_COMM_LEN);
- core->tc.flags = stat->flags;
- BUILD_BUG_ON(sizeof(core->tc.blk_sigset) != sizeof(k_rtsigset_t));
- memcpy(&core->tc.blk_sigset, &misc->blocked, sizeof(k_rtsigset_t));
+ strncpy((char *)core->tc->comm, stat->comm, TASK_COMM_LEN);
+ core->tc->flags = stat->flags;
+ BUILD_BUG_ON(sizeof(core->tc->blk_sigset) != sizeof(k_rtsigset_t));
+ memcpy(&core->tc->blk_sigset, &misc->blocked, sizeof(k_rtsigset_t));
- core->tc.task_state = TASK_ALIVE;
- core->tc.exit_code = 0;
+ core->tc->task_state = TASK_ALIVE;
+ core->tc->exit_code = 0;
- ret = dump_task_core(core, fdset_fd(cr_fdset, CR_FD_CORE));
- if (ret)
+ ret = pb_write(fd_core, core, core_entry);
+ if (ret < 0) {
+ pr_info("ERROR\n");
goto err_free;
- pr_info("OK\n");
+ } else
+ pr_info("OK\n");
err_free:
- free(core);
-err:
+ core_entry_free(core);
pr_info("----------------------------------------\n");
return ret;
@@ -1125,7 +1206,7 @@ try_again:
static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid)
{
- struct core_entry *core;
+ CoreEntry *core;
int ret = -1, fd_core;
unsigned int *taddr;
pid_t pid = tid->real;
@@ -1134,7 +1215,7 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid)
pr_info("Dumping core for thread (pid: %d)\n", pid);
pr_info("----------------------------------------\n");
- core = xzalloc(sizeof(*core));
+ core = core_entry_alloc();
if (!core)
goto err;
@@ -1149,22 +1230,22 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid)
}
pr_info("%d: tid_address=%p\n", pid, taddr);
- core->clear_tid_address = (u64) taddr;
+ core->clear_tid_addr = (u64) taddr;
pr_info("OK\n");
- core->tc.task_state = TASK_ALIVE;
- core->tc.exit_code = 0;
+ core->tc->task_state = TASK_ALIVE;
+ core->tc->exit_code = 0;
fd_core = open_image(CR_FD_CORE, O_DUMP, tid->virt);
if (fd_core < 0)
goto err_free;
- ret = dump_task_core(core, fd_core);
+ ret = pb_write(fd_core, core, core_entry);
close(fd_core);
err_free:
- free(core);
+ core_entry_free(core);
err:
pr_info("----------------------------------------\n");
return ret;
@@ -1173,24 +1254,24 @@ err:
static int dump_one_zombie(const struct pstree_item *item,
const struct proc_pid_stat *pps)
{
- struct core_entry *core;
+ CoreEntry *core;
int ret = -1, fd_core;
- core = xzalloc(sizeof(*core));
+ core = core_entry_alloc();
if (core == NULL)
goto err;
- core->tc.task_state = TASK_DEAD;
- core->tc.exit_code = pps->exit_code;
+ core->tc->task_state = TASK_DEAD;
+ core->tc->exit_code = pps->exit_code;
fd_core = open_image(CR_FD_CORE, O_DUMP, item->pid);
if (fd_core < 0)
goto err_free;
- ret = dump_task_core(core, fd_core);
+ ret = pb_write(fd_core, core, core_entry);
close(fd_core);
err_free:
- xfree(core);
+ core_entry_free(core);
err:
return ret;
}
diff --git a/cr-restore.c b/cr-restore.c
index 2559b21..ac0d44c 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -57,7 +57,7 @@
static struct pstree_item *me;
static int restore_task_with_children(void *);
-static int sigreturn_restore(pid_t pid, struct list_head *vmas, int nr_vmas);
+static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *vmas, int nr_vmas);
static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
{
@@ -186,7 +186,7 @@ static int read_and_open_vmas(int pid, struct list_head *vmas, int *nr_vmas)
return ret;
}
-static int prepare_and_sigreturn(int pid)
+static int prepare_and_sigreturn(int pid, CoreEntry *core)
{
int err, nr_vmas;
LIST_HEAD(vma_list);
@@ -195,7 +195,7 @@ static int prepare_and_sigreturn(int pid)
if (err)
return err;
- return sigreturn_restore(pid, &vma_list, nr_vmas);
+ return sigreturn_restore(pid, core, &vma_list, nr_vmas);
}
static rt_sigaction_t sigchld_act;
@@ -276,7 +276,7 @@ static int pstree_wait_helpers()
}
-static int restore_one_alive_task(int pid)
+static int restore_one_alive_task(int pid, CoreEntry *core)
{
pr_info("Restoring resources\n");
@@ -292,7 +292,7 @@ static int restore_one_alive_task(int pid)
if (prepare_sigactions(pid))
return -1;
- return prepare_and_sigreturn(pid);
+ return prepare_and_sigreturn(pid, core);
}
static void zombie_prepare_signals(void)
@@ -391,29 +391,24 @@ static int restore_one_zombie(int pid, int exit_code)
return -1;
}
-static int check_core_header(int pid, struct task_core_entry *tc)
+static int check_core_header(int pid, CoreEntry *core)
{
int fd = -1, ret = -1;
- struct image_header hdr;
fd = open_image_ro(CR_FD_CORE, pid);
if (fd < 0)
return -1;
- if (read_img(fd, &hdr) < 0)
- goto out;
-
- if (hdr.version != HEADER_VERSION) {
- pr_err("Core version mismatch %d\n", (int)hdr.version);
+ if (core->version != 1) {
+ pr_err("Core version mismatch %d\n", (int)core->version);
goto out;
}
- if (hdr.arch != HEADER_ARCH_X86_64) {
- pr_err("Core arch mismatch %d\n", (int)hdr.arch);
+ if (core->mtype != CORE_ENTRY__MARCH__X86_64) {
+ pr_err("Core march mismatch %d\n", (int)core->mtype);
goto out;
}
-
- ret = read_img(fd, tc);
+ ret = 0;
out:
close_safe(&fd);
return ret < 0 ? ret : 0;
@@ -421,21 +416,37 @@ out:
static int restore_one_task(int pid)
{
- struct task_core_entry tc;
+ int fd, ret;
+ CoreEntry *core;
if (me->state == TASK_HELPER)
return restore_one_fake(pid);
- if (check_core_header(pid, &tc))
+ fd = open_image_ro(CR_FD_CORE, pid);
+ if (fd < 0)
return -1;
- switch ((int)tc.task_state) {
+ ret = pb_read(fd, &core, core_entry);
+ close(fd);
+
+ if (ret < 0)
+ return -1;
+
+ if (check_core_header(pid, core))
+ return -1;
+
+ switch ((int)core->tc->task_state) {
case TASK_ALIVE:
- return restore_one_alive_task(pid);
- case TASK_DEAD:
- return restore_one_zombie(pid, tc.exit_code);
+ return restore_one_alive_task(pid, core);
+ case TASK_DEAD: {
+ int exit_code = core->tc->exit_code;
+ core_entry__free_unpacked(core, NULL);
+
+ return restore_one_zombie(pid, exit_code);
+ }
default:
- pr_err("Unknown state in code %d\n", (int)tc.task_state);
+ pr_err("Unknown state in code %d\n", (int)core->tc->task_state);
+ core_entry__free_unpacked(core, NULL);
return -1;
}
}
@@ -1119,7 +1130,7 @@ out:
return ret;
}
-static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
+static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_vmas, int nr_vmas)
{
long restore_code_len, restore_task_vma_len;
long restore_thread_vma_len, self_vmas_len, vmas_len;
@@ -1136,7 +1147,6 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
struct thread_restore_args *thread_args;
LIST_HEAD(self_vma_list);
- int fd_core = -1;
int fd_pages = -1;
int i;
@@ -1161,12 +1171,6 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
BUILD_BUG_ON(SHMEMS_SIZE % PAGE_SIZE);
BUILD_BUG_ON(TASK_ENTRIES_SIZE % PAGE_SIZE);
- fd_core = open_image_ro(CR_FD_CORE, pid);
- if (fd_core < 0) {
- pr_perror("Can't open core-out-%d", pid);
- goto err;
- }
-
fd_pages = open_image_ro(CR_FD_PAGES, pid);
if (fd_pages < 0) {
pr_perror("Can't open pages-%d", pid);
@@ -1281,12 +1285,22 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
/*
* Arguments for task restoration.
*/
+
+ BUG_ON(core->version != 1);
+ BUG_ON(core->mtype != CORE_ENTRY__MARCH__X86_64);
+
task_args->pid = pid;
- task_args->fd_core = fd_core;
task_args->logfd = log_get_fd();
task_args->sigchld_act = sigchld_act;
task_args->fd_pages = fd_pages;
+ strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm));
+
+ task_args->clear_tid_addr = core->clear_tid_addr;
+ task_args->ids = *core->ids;
+ task_args->gpregs = *core->arch_x86->gpregs;
+ task_args->blk_sigset = core->tc->blk_sigset;
+
ret = prepare_itimers(pid, task_args);
if (ret < 0)
goto err;
@@ -1312,18 +1326,35 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
* Fill up per-thread data.
*/
for (i = 0; i < me->nr_threads; i++) {
+ CoreEntry *core;
+ int fd_core;
thread_args[i].pid = me->threads[i].virt;
/* skip self */
if (thread_args[i].pid == pid)
continue;
- /* Core files are to be opened */
- thread_args[i].fd_core = open_image_ro(CR_FD_CORE, thread_args[i].pid);
- if (thread_args[i].fd_core < 0)
+ fd_core = open_image_ro(CR_FD_CORE, thread_args[i].pid);
+ if (fd_core < 0) {
+ pr_err("Can't open core data for thread %d\n",
+ thread_args[i].pid);
goto err;
+ }
+
+ ret = pb_read(fd_core, &core, core_entry);
+ close(fd_core);
+
+ if (ret < 0) {
+ pr_err("Can't read core data for thread %d\n",
+ thread_args[i].pid);
+ goto err;
+ }
+
+ thread_args[i].rst_lock = &task_args->rst_lock;
+ thread_args[i].gpregs = *core->arch_x86->gpregs;
+ thread_args[i].clear_tid_addr = core->clear_tid_addr;
- thread_args[i].rst_lock = &task_args->rst_lock;
+ core_entry__free_unpacked(core, NULL);
pr_info("Thread %4d stack %8p heap %8p rt_sigframe %8p\n",
i, thread_args[i].mem_zone.stack,
@@ -1336,12 +1367,10 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
pr_info("task_args: %p\n"
"task_args->pid: %d\n"
- "task_args->fd_core: %d\n"
"task_args->nr_threads: %d\n"
"task_args->clone_restore_fn: %p\n"
"task_args->thread_args: %p\n",
task_args, task_args->pid,
- task_args->fd_core,
task_args->nr_threads,
task_args->clone_restore_fn,
task_args->thread_args);
@@ -1364,7 +1393,6 @@ static int sigreturn_restore(pid_t pid, struct list_head *tgt_vmas, int nr_vmas)
err:
free_mappings(&self_vma_list);
- close_safe(&fd_core);
/* Just to be sure */
exit(1);
diff --git a/cr-show.c b/cr-show.c
index b3b87df..033c1ff 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -378,30 +378,6 @@ void show_pstree(int fd_pstree, struct cr_options *o)
show_collect_pstree(fd_pstree, NULL);
}
-static void show_core_regs(int fd_core)
-{
- struct user_regs_entry regs;
-
- pr_msg("\n\t---[GP registers set]---\n");
-
- lseek(fd_core, GET_FILE_OFF(struct core_entry, arch.gpregs), SEEK_SET);
-
- if (read_img(fd_core, ®s) < 0)
- goto err;
-
- pr_regs4(regs, cs, ip, ds, es);
- pr_regs4(regs, ss, sp, fs, gs);
- pr_regs4(regs, di, si, dx, cx);
- pr_regs4(regs, ax, r8, r9, r10);
- pr_regs4(regs, r11, r12, r13, r14);
- pr_regs3(regs, r15, bp, bx);
- pr_regs4(regs, orig_ax, flags, fs_base, gs_base);
- pr_msg("\n");
-
-err:
- return;
-}
-
static inline char *task_state_str(int state)
{
switch (state) {
@@ -414,69 +390,8 @@ static inline char *task_state_str(int state)
}
}
-static void show_core_rest(int fd_core)
-{
- struct task_core_entry tc;
-
- lseek(fd_core, GET_FILE_OFF(struct core_entry, tc), SEEK_SET);
- if (read_img(fd_core, &tc) < 0)
- goto err;
-
- pr_msg("\n\t---[Task parameters]---\n");
- pr_msg("\tPersonality: %#x\n", tc.personality);
- pr_msg("\tCommand: %s\n", tc.comm);
- pr_msg("\tState: %d (%s)\n",
- (int)tc.task_state,
- task_state_str((int)tc.task_state));
-
- pr_msg("\t Exit code: %u\n",
- (unsigned int)tc.exit_code);
-
- pr_msg("\tBlkSig: 0x%lx\n", tc.blk_sigset);
- pr_msg("\n");
-
-err:
- return;
-}
-
-static void show_core_ids(int fd)
-{
- struct core_ids_entry cie;
-
- lseek(fd, GET_FILE_OFF(struct core_entry, ids), SEEK_SET);
- if (read_img(fd, &cie) < 0)
- goto err;
-
- pr_msg("\tVM: %#x\n", cie.vm_id);
- pr_msg("\tFS: %#x\n", cie.fs_id);
- pr_msg("\tFILES: %#x\n", cie.files_id);
- pr_msg("\tSIGHAND: %#x\n", cie.sighand_id);
-err:
- return;
-}
-
void show_core(int fd_core, struct cr_options *o)
{
- struct stat stat;
- bool is_thread;
-
- if (fstat(fd_core, &stat)) {
- pr_perror("Can't get stat on core file");
- goto out;
- }
-
- is_thread = (stat.st_size == GET_FILE_OFF_AFTER(struct core_entry));
-
- if (is_thread)
- pr_img_head(CR_FD_CORE, " (thread)");
- else
- pr_img_head(CR_FD_CORE);
-
- show_core_regs(fd_core);
- show_core_rest(fd_core);
- show_core_ids(fd_core);
-out:
- pr_img_tail(CR_FD_CORE);
}
void show_mm(int fd_mm, struct cr_options *o)
diff --git a/include/image.h b/include/image.h
index d8d57e6..a54eca7 100644
--- a/include/image.h
+++ b/include/image.h
@@ -80,140 +80,19 @@ struct page_entry {
#define CR_CAP_SIZE 2
-#define HEADER_VERSION 1
-#define HEADER_ARCH_X86_64 1
-
-struct image_header {
- u16 version;
- u16 arch;
- u32 flags;
-} __packed;
-
-/*
- * PTRACE_GETREGS
- * PTRACE_GETFPREGS
- * PTRACE_GETFPXREGS dep CONFIG_X86_32
- * PTRACE_GET_THREAD_AREA dep CONFIG_X86_32 || CONFIG_IA32_EMULATION
- * PTRACE_GETFDPIC dep CONFIG_BINFMT_ELF_FDPIC
- *
- * PTRACE_ARCH_PRCTL dep CONFIG_X86_64
- * ARCH_SET_GS/ARCH_GET_FS
- * ARCH_SET_FS/ARCH_GET_GS
- */
-
#ifdef CONFIG_X86_64
-struct user_regs_entry {
- u64 r15;
- u64 r14;
- u64 r13;
- u64 r12;
- u64 bp;
- u64 bx;
- u64 r11;
- u64 r10;
- u64 r9;
- u64 r8;
- u64 ax;
- u64 cx;
- u64 dx;
- u64 si;
- u64 di;
- u64 orig_ax;
- u64 ip;
- u64 cs;
- u64 flags;
- u64 sp;
- u64 ss;
- u64 fs_base;
- u64 gs_base;
- u64 ds;
- u64 es;
- u64 fs;
- u64 gs;
-} __packed;
-
-struct desc_struct {
- union {
- struct {
- u32 a;
- u32 b;
- } x86_32;
- u64 base_addr;
- };
-} __packed;
-
-struct user_fpregs_entry {
- u16 cwd;
- u16 swd;
- u16 twd; /* Note this is not the same as
- the 32bit/x87/FSAVE twd */
- u16 fop;
- u64 rip;
- u64 rdp;
- u32 mxcsr;
- u32 mxcsr_mask;
- u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
- u32 padding[24];
-} __packed;
-
#define GDT_ENTRY_TLS_ENTRIES 3
#define TASK_COMM_LEN 16
#define TASK_PF_USED_MATH 0x00002000
-#define CKPT_ARCH_SIZE (1 * 4096)
-
-struct ckpt_arch_entry {
- union {
- struct {
- struct user_regs_entry gpregs;
- struct user_fpregs_entry fpregs;
- };
- u8 __arch_pad[CKPT_ARCH_SIZE]; /* should be enough for all */
- };
-};
-
-#define CKPT_CORE_SIZE (2 * 4096)
-
#ifdef CONFIG_X86_64
# define AT_VECTOR_SIZE 44
#else
# define AT_VECTOR_SIZE 22 /* Not needed at moment */
#endif
-struct task_core_entry {
- u8 task_state;
- u8 pad[3];
- u32 exit_code;
-
- u32 personality;
- u8 comm[TASK_COMM_LEN];
- u32 flags;
- u64 blk_sigset;
-};
-
-struct core_ids_entry {
- u32 vm_id;
- u32 files_id;
- u32 fs_id;
- u32 sighand_id;
-} __packed;
-
-struct core_entry {
- union {
- struct {
- struct image_header header;
- struct task_core_entry tc;
- struct ckpt_arch_entry arch;
- struct core_ids_entry ids;
- u64 clear_tid_address;
- };
- u8 __core_pad[CKPT_CORE_SIZE];
- };
-} __packed;
-
#define TASK_ALIVE 0x1
#define TASK_DEAD 0x2
#define TASK_STOPPED 0x3 /* FIXME - implement */
diff --git a/include/restorer.h b/include/restorer.h
index 1be2168..66b6e31 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -14,6 +14,7 @@
#include "../protobuf/mm.pb-c.h"
#include "../protobuf/vma.pb-c.h"
#include "../protobuf/creds.pb-c.h"
+#include "../protobuf/core.pb-c.h"
#ifndef CONFIG_X86_64
# error Only x86-64 is supported
@@ -60,15 +61,15 @@ struct thread_restore_args {
struct restore_mem_zone mem_zone;
int pid;
- int fd_core;
mutex_t *rst_lock;
+ UserX86RegsEntry gpregs;
+ u64 clear_tid_addr;
} __aligned(sizeof(long));
struct task_restore_core_args {
struct restore_mem_zone mem_zone;
int pid; /* task pid */
- int fd_core; /* opened core file */
int fd_exe_link; /* opened self->exe file */
int fd_pages; /* opened pages dump file */
int logfd;
@@ -95,6 +96,11 @@ struct task_restore_core_args {
MmEntry mm;
u64 mm_saved_auxv[AT_VECTOR_SIZE];
+ u64 clear_tid_addr;
+ u64 blk_sigset;
+ char comm[TASK_COMM_LEN];
+ CoreIdsEntry ids;
+ UserX86RegsEntry gpregs;
} __aligned(sizeof(long));
struct pt_regs {
diff --git a/protobuf/Makefile b/protobuf/Makefile
index c61d875..4a7249e 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -49,6 +49,7 @@ PROTO_FILES += ipc-msg.proto
PROTO_FILES += ipc-sem.proto
PROTO_FILES += creds.proto
PROTO_FILES += vma.proto
+PROTO_FILES += core.proto
HDRS := $(patsubst %.proto,%.pb-c.h,$(PROTO_FILES))
SRCS := $(patsubst %.proto,%.pb-c.c,$(PROTO_FILES))
diff --git a/protobuf/core.proto b/protobuf/core.proto
new file mode 100644
index 0000000..18f3e42
--- /dev/null
+++ b/protobuf/core.proto
@@ -0,0 +1,83 @@
+message user_x86_regs_entry {
+ required uint64 r15 = 1;
+ required uint64 r14 = 2;
+ required uint64 r13 = 3;
+ required uint64 r12 = 4;
+ required uint64 bp = 5;
+ required uint64 bx = 6;
+ required uint64 r11 = 7;
+ required uint64 r10 = 8;
+ required uint64 r9 = 9;
+ required uint64 r8 = 10;
+ required uint64 ax = 11;
+ required uint64 cx = 12;
+ required uint64 dx = 13;
+ required uint64 si = 14;
+ required uint64 di = 15;
+ required uint64 orig_ax = 16;
+ required uint64 ip = 17;
+ required uint64 cs = 18;
+ required uint64 flags = 19;
+ required uint64 sp = 20;
+ required uint64 ss = 21;
+ required uint64 fs_base = 22;
+ required uint64 gs_base = 23;
+ required uint64 ds = 24;
+ required uint64 es = 25;
+ required uint64 fs = 26;
+ required uint64 gs = 27;
+}
+
+message user_x86_fpregs_entry {
+ required uint32 cwd = 1;
+ required uint32 swd = 2;
+ required uint32 twd = 3;
+ required uint32 fop = 4;
+ required uint64 rip = 5;
+ required uint64 rdp = 6;
+ required uint32 mxcsr = 7;
+ required uint32 mxcsr_mask = 8;
+ repeated uint32 st_space = 9;
+ repeated uint32 xmm_space = 10;
+ repeated uint32 padding = 11;
+}
+
+message arch_x86_entry {
+ required user_x86_regs_entry gpregs = 1;
+ required user_x86_fpregs_entry fpregs = 2;
+}
+
+message task_core_entry {
+ required uint32 task_state = 1;
+ required uint32 exit_code = 2;
+
+ required uint32 personality = 3;
+ required uint32 flags = 4;
+ required uint64 blk_sigset = 5;
+
+ required string comm = 6;
+}
+
+message core_ids_entry {
+ required uint32 vm_id = 1;
+ required uint32 files_id = 2;
+ required uint32 fs_id = 3;
+ required uint32 sighand_id = 4;
+}
+
+message core_entry {
+ enum march {
+ UNKNOWN = 0;
+ X86_64 = 1;
+ X86_32 = 2;
+ }
+
+ required uint32 version = 1;
+ required march mtype = 2;
+
+ required task_core_entry tc = 3;
+ required core_ids_entry ids = 4;
+ required uint64 clear_tid_addr = 5;
+
+ optional arch_x86_entry arch_x86 = 6;
+}
diff --git a/restorer.c b/restorer.c
index e20a2ab..dfea61a 100644
--- a/restorer.c
+++ b/restorer.c
@@ -131,7 +131,6 @@ static void restore_creds(CredsEntry *ce)
long __export_restore_thread(struct thread_restore_args *args)
{
long ret = -1;
- struct core_entry *core_entry;
struct rt_sigframe *rt_sigframe;
unsigned long new_sp, fsgs_base;
int my_pid = sys_gettid();
@@ -143,23 +142,12 @@ long __export_restore_thread(struct thread_restore_args *args)
goto core_restore_end;
}
- core_entry = (struct core_entry *)&args->mem_zone.heap;
-
- ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry));
- if (ret != sizeof(*core_entry)) {
- write_num_n(__LINE__);
- goto core_restore_end;
- }
-
- /* We're to close it! */
- sys_close(args->fd_core);
-
- sys_set_tid_address((int *) core_entry->clear_tid_address);
+ sys_set_tid_address((int *)args->clear_tid_addr);
rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8;
-#define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.d
-#define CPREGT2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.s
+#define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = args->gpregs.d
+#define CPREGT2(d,s) rt_sigframe->uc.uc_mcontext.d = args->gpregs.s
CPREGT1(r8);
CPREGT1(r9);
@@ -183,7 +171,7 @@ long __export_restore_thread(struct thread_restore_args *args)
CPREGT1(gs);
CPREGT1(fs);
- fsgs_base = core_entry->arch.gpregs.fs_base;
+ fsgs_base = args->gpregs.fs_base;
ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
@@ -191,7 +179,7 @@ long __export_restore_thread(struct thread_restore_args *args)
goto core_restore_end;
}
- fsgs_base = core_entry->arch.gpregs.gs_base;
+ fsgs_base = args->gpregs.gs_base;
ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
@@ -294,7 +282,6 @@ static u64 restore_mapping(const VmaEntry *vma_entry)
long __export_restore_task(struct task_restore_core_args *args)
{
long ret = -1;
- struct core_entry *core_entry;
VmaEntry *vma_entry;
u64 va;
@@ -310,8 +297,6 @@ long __export_restore_task(struct task_restore_core_args *args)
restorer_set_logfd(args->logfd);
- core_entry = first_on_heap(core_entry, args->mem_zone.heap);
-
#if 0
write_hex_n((long)args);
write_hex_n((long)args->mem_zone.heap);
@@ -319,12 +304,6 @@ long __export_restore_task(struct task_restore_core_args *args)
write_hex_n((long)vma_entry);
#endif
- ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry));
- if (ret != sizeof(*core_entry)) {
- write_num_n(__LINE__);
- goto core_restore_end;
- }
-
for (vma_entry = args->self_vmas; vma_entry->start != 0; vma_entry++) {
if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
continue;
@@ -412,7 +391,6 @@ long __export_restore_task(struct task_restore_core_args *args)
sys_munmap(args->tgt_vmas,
((void *)(vma_entry + 1) - ((void *)args->tgt_vmas)));
- sys_close(args->fd_core);
ret = sys_munmap(args->shmems, SHMEMS_SIZE);
if (ret < 0) {
@@ -421,12 +399,12 @@ long __export_restore_task(struct task_restore_core_args *args)
goto core_restore_end;
}
- sys_set_tid_address((int *) core_entry->clear_tid_address);
+ sys_set_tid_address((int *)args->clear_tid_addr);
/*
* Tune up the task fields.
*/
- ret |= sys_prctl_safe(PR_SET_NAME, (long)core_entry->tc.comm, 0, 0);
+ ret |= sys_prctl_safe(PR_SET_NAME, (long)args->comm, 0, 0);
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)args->mm.mm_start_code, 0);
ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)args->mm.mm_end_code, 0);
@@ -462,8 +440,8 @@ long __export_restore_task(struct task_restore_core_args *args)
*/
rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8;
-#define CPREG1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.d
-#define CPREG2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->arch.gpregs.s
+#define CPREG1(d) rt_sigframe->uc.uc_mcontext.d = args->gpregs.d
+#define CPREG2(d,s) rt_sigframe->uc.uc_mcontext.d = args->gpregs.s
CPREG1(r8);
CPREG1(r9);
@@ -487,7 +465,7 @@ long __export_restore_task(struct task_restore_core_args *args)
CPREG1(gs);
CPREG1(fs);
- fsgs_base = core_entry->arch.gpregs.fs_base;
+ fsgs_base = args->gpregs.fs_base;
ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
@@ -495,7 +473,7 @@ long __export_restore_task(struct task_restore_core_args *args)
goto core_restore_end;
}
- fsgs_base = core_entry->arch.gpregs.gs_base;
+ fsgs_base = args->gpregs.gs_base;
ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
@@ -506,7 +484,7 @@ long __export_restore_task(struct task_restore_core_args *args)
/*
* Blocked signals.
*/
- rt_sigframe->uc.uc_sigmask.sig[0] = core_entry->tc.blk_sigset;
+ rt_sigframe->uc.uc_sigmask.sig[0] = args->blk_sigset;
/*
* Threads restoration. This requires some more comments. This
--
1.7.7.6
More information about the CRIU
mailing list