[CRIU] [PATCH 4/5] restore: restore pocesses which share one fdtable (v3)
Andrey Vagin
avagin at openvz.org
Tue Dec 25 08:10:10 EST 2012
Currenly crtools supports a case when a child shared a fd table
with parent.
Here is only two interesting things.
* Service descriptors should be cloned for each process
who shared one fd table.
* One task should restore files and other tasks should sleep in this
* time.
v2: * allocate fdt_lock from shared memory
* don't wait a child, if it doesn't share fdtable
v3: * don't move ids on the pstree image
Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
cr-dump.c | 29 +++++++++++++++++------------
cr-restore.c | 44 ++++++++++++++++++++++++++++++++++++++++----
files.c | 7 +------
include/crtools.h | 9 +++++++++
include/files.h | 1 +
include/pstree.h | 10 ++++++++++
pstree.c | 11 +++++++++++
7 files changed, 89 insertions(+), 22 deletions(-)
diff --git a/cr-dump.c b/cr-dump.c
index 00af534..7d58248 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -781,10 +781,11 @@ static DECLARE_KCMP_TREE(fs_tree, KCMP_FS);
static DECLARE_KCMP_TREE(files_tree, KCMP_FILES);
static DECLARE_KCMP_TREE(sighand_tree, KCMP_SIGHAND);
-static int dump_task_kobj_ids(pid_t pid, CoreEntry *core)
+static int dump_task_kobj_ids(struct pstree_item *item, CoreEntry *core)
{
int new;
struct kid_elem elem;
+ int pid = item->pid.real;
elem.pid = pid;
elem.idx = 0; /* really 0 for all */
@@ -806,7 +807,7 @@ static int dump_task_kobj_ids(pid_t pid, CoreEntry *core)
new = 0;
core->ids->files_id = kid_generate_gen(&files_tree, &elem, &new);
- if (!core->ids->files_id || !new) {
+ if (!core->ids->files_id || (!new && !shared_fdtable(item))) {
pr_err("Can't make FILES id for %d\n", pid);
return -1;
}
@@ -937,12 +938,13 @@ err:
return NULL;
}
-static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat,
+static int dump_task_core_all(struct pstree_item *item, const struct proc_pid_stat *stat,
const struct parasite_dump_misc *misc, const struct parasite_ctl *ctl,
const struct cr_fdset *cr_fdset,
struct list_head *vma_area_list)
{
int fd_core = fdset_fd(cr_fdset, CR_FD_CORE);
+ pid_t pid = item->pid.real;
CoreEntry *core;
int ret = -1;
@@ -954,7 +956,9 @@ static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat,
pr_info("Dumping core (pid: %d)\n", pid);
pr_info("----------------------------------------\n");
- ret = dump_task_kobj_ids(pid, core);
+ item->core = core;
+
+ ret = dump_task_kobj_ids(item, core);
if (ret)
goto err_free;
@@ -993,7 +997,6 @@ static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat,
goto err_free;
err_free:
- core_entry_free(core);
pr_info("----------------------------------------\n");
return ret;
@@ -1606,12 +1609,6 @@ static int dump_one_task(struct pstree_item *item)
if (!cr_fdset)
goto err_cure;
- ret = dump_task_files_seized(parasite_ctl, cr_fdset, dfds);
- if (ret) {
- pr_err("Dump files (pid: %d) failed with %d\n", pid, ret);
- goto err_cure;
- }
-
ret = parasite_dump_pages_seized(parasite_ctl, &vma_area_list, cr_fdset);
if (ret) {
pr_err("Can't dump pages (pid: %d) with parasite\n", pid);
@@ -1630,13 +1627,21 @@ static int dump_one_task(struct pstree_item *item)
goto err_cure;
}
- ret = dump_task_core_all(pid, &pps_buf, &misc,
+ ret = dump_task_core_all(item, &pps_buf, &misc,
parasite_ctl, cr_fdset, &vma_area_list);
if (ret) {
pr_err("Dump core (pid: %d) failed with %d\n", pid, ret);
goto err_cure;
}
+ if (!shared_fdtable(item)) {
+ ret = dump_task_files_seized(parasite_ctl, cr_fdset, dfds);
+ if (ret) {
+ pr_err("Dump files (pid: %d) failed with %d\n", pid, ret);
+ goto err_cure;
+ }
+ }
+
ret = dump_task_threads(parasite_ctl, item);
if (ret) {
pr_err("Can't dump threads\n");
diff --git a/cr-restore.c b/cr-restore.c
index b3e9572..6b34e83 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -585,13 +585,34 @@ static int pstree_wait_helpers()
static int restore_one_alive_task(int pid, CoreEntry *core)
{
+ struct pstree_item *child;
+
pr_info("Restoring resources\n");
if (pstree_wait_helpers())
return -1;
- if (prepare_fds(current))
- return -1;
+ /*
+ * Wait all children, who share a current fd table.
+ * We should be sure, that children don't use any file
+ * descriptor while fdtable is being restored.
+ */
+ list_for_each_entry(child, ¤t->children, sibling) {
+ if (!shared_fdtable(child))
+ continue;
+ futex_wait_until(child->rst->fdt_lock, FDT_LOCK_SYNC);
+ }
+
+ if (!shared_fdtable(current)) {
+ if (prepare_fds(current))
+ return -1;
+ } else {
+ /* Notify a parent, that a current is ready for restoring fdtable */
+ futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_SYNC);
+ futex_wait_until(current->parent->rst->fdt_lock, FDT_LOCK_DONE);
+ }
+
+ futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_DONE);
if (prepare_fs(pid))
return -1;
@@ -671,6 +692,7 @@ static int restore_one_zombie(int pid, int exit_code)
{
pr_info("Restoring zombie with %d code\n", exit_code);
+ futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_DONE);
if (task_entries != NULL) {
restore_finish_stage(CR_STATE_RESTORE);
zombie_prepare_signals();
@@ -792,6 +814,9 @@ static inline int fork_with_pid(struct pstree_item *item, unsigned long ns_clone
ca.item = item;
ca.clone_flags = ns_clone_flags;
+ if (shared_fdtable(item))
+ ca.clone_flags |= CLONE_FILES;
+
if (!(ca.clone_flags & CLONE_NEWPID)) {
char buf[32];
@@ -989,10 +1014,18 @@ static int restore_task_with_children(void *_arg)
int ret;
sigset_t blockmask;
- close_safe(&ca->fd);
-
current = ca->item;
+ if (ca->clone_flags & CLONE_FILES)
+ ret = clone_service_fd(false);
+ else {
+ close_safe(&ca->fd);
+ ret = clone_service_fd(true);
+ }
+
+ if (ret != 0)
+ return -1;
+
pid = getpid();
if (current->pid.virt != pid) {
pr_err("Pid %d do not match expected %d\n", pid, current->pid.virt);
@@ -1023,6 +1056,9 @@ static int restore_task_with_children(void *_arg)
exit(-1);
}
+ if (!shared_fdtable(current))
+ close_old_fds(current);
+
/*
* The block mask will be restored in sigresturn.
*
diff --git a/files.c b/files.c
index 5bf4ffa..9ebef20 100644
--- a/files.c
+++ b/files.c
@@ -496,7 +496,7 @@ static int open_fdinfos(int pid, struct list_head *list, int state)
return ret;
}
-static int close_old_fds(struct pstree_item *me)
+int close_old_fds(struct pstree_item *me)
{
DIR *dir;
struct dirent *de;
@@ -531,10 +531,6 @@ int prepare_fds(struct pstree_item *me)
u32 ret;
int state;
- ret = close_old_fds(me);
- if (ret)
- goto err;
-
pr_info("Opening fdinfo-s\n");
for (state = 0; state < ARRAY_SIZE(states); state++) {
@@ -560,7 +556,6 @@ int prepare_fds(struct pstree_item *me)
break;
}
-err:
tty_fini_fds();
return ret;
}
diff --git a/include/crtools.h b/include/crtools.h
index 9d94fd7..815b574 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -8,6 +8,7 @@
#include "list.h"
#include "util.h"
#include "image.h"
+#include "lock.h"
#include "../protobuf/vma.pb-c.h"
@@ -226,6 +227,12 @@ struct vma_area {
#define vma_area_is(vma_area, s) vma_entry_is(&((vma_area)->vma), s)
#define vma_area_len(vma_area) vma_entry_len(&((vma_area)->vma))
+enum {
+ FDT_LOCK_INIT = 0,
+ FDT_LOCK_SYNC,
+ FDT_LOCK_DONE,
+};
+
struct rst_info {
struct list_head fds;
struct list_head eventpoll;
@@ -233,6 +240,8 @@ struct rst_info {
void *premmapped_addr;
unsigned long premmapped_len;
+
+ futex_t *fdt_lock;
};
static inline int in_vma_area(struct vma_area *vma, unsigned long addr)
diff --git a/include/files.h b/include/files.h
index fcd9f31..370e3f3 100644
--- a/include/files.h
+++ b/include/files.h
@@ -94,6 +94,7 @@ extern int get_filemap_fd(int pid, VmaEntry *vma_entry);
extern int prepare_fs(int pid);
extern int set_fd_flags(int fd, int flags);
+extern int close_old_fds(struct pstree_item *me);
#ifndef AT_EMPTY_PATH
#define AT_EMPTY_PATH 0x1000
#endif
diff --git a/include/pstree.h b/include/pstree.h
index e9e01ce..70b990b 100644
--- a/include/pstree.h
+++ b/include/pstree.h
@@ -46,6 +46,16 @@ struct pstree_item {
struct rst_info rst[0];
};
+static inline int shared_fdtable(struct pstree_item *item) {
+ return (item->parent && item->parent->state != TASK_HELPER &&
+ item->core &&
+ item->core->ids &&
+ item->parent->core &&
+ item->parent->core->ids &&
+ item->core->ids->files_id &&
+ item->core->ids->files_id == item->parent->core->ids->files_id);
+}
+
extern void free_pstree(struct pstree_item *root_item);
extern struct pstree_item *__alloc_pstree_item(bool rst);
#define alloc_pstree_item() __alloc_pstree_item(false)
diff --git a/pstree.c b/pstree.c
index c9a402e..c5908b6 100644
--- a/pstree.c
+++ b/pstree.c
@@ -5,6 +5,7 @@
#include "pstree.h"
#include "restorer.h"
#include "util.h"
+#include "lock.h"
#include "protobuf.h"
#include "protobuf/pstree.pb-c.h"
@@ -37,6 +38,16 @@ struct pstree_item *__alloc_pstree_item(bool rst)
if (!item)
return NULL;
+ if (rst) {
+ item->rst->fdt_lock = shmalloc(sizeof(*item->rst->fdt_lock));
+ if (item->rst->fdt_lock == NULL) {
+ xfree(item);
+ return NULL;
+ }
+
+ futex_init(item->rst->fdt_lock);
+ }
+
INIT_LIST_HEAD(&item->children);
INIT_LIST_HEAD(&item->sibling);
--
1.7.11.7
More information about the CRIU
mailing list