[CRIU] [PATCH 5/5] restore: restore pocesses which share one fdtable (v2)

Andrey Vagin avagin at openvz.org
Fri Nov 23 03:24:31 EST 2012


Currenly crtools supports a case when a child shared a fd table
with parent.

Here is only two interesting thing.
* Service descriptors should be cloned for each process
  who shared one fd table.
* One task should restore files and other tasks should sleep in this time.

v2: * allocate fdt_lock from shared memory
    * don't wait a child, if it doesn't share fdtable

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 cr-dump.c         | 12 +++++++-----
 cr-restore.c      | 43 +++++++++++++++++++++++++++++++++++++++----
 files.c           |  7 +------
 include/crtools.h |  8 ++++++++
 include/files.h   |  1 +
 include/pstree.h  |  5 +++++
 pstree.c          | 11 +++++++++++
 7 files changed, 72 insertions(+), 15 deletions(-)

diff --git a/cr-dump.c b/cr-dump.c
index f958a9f..141caca 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -1552,12 +1552,14 @@ static int dump_one_task(struct pstree_item *item)
 
 	ret = dump_task_kobj_ids(item);
 	if (ret)
- 		goto err_cure;
-
-	ret = dump_task_files_seized(parasite_ctl, cr_fdset, dfds);
-	if (ret) {
-		pr_err("Dump files (pid: %d) failed with %d\n", pid, ret);
 		goto err_cure;
+
+	if (!shared_fdtable(item)) {
+		ret = dump_task_files_seized(parasite_ctl, cr_fdset, dfds);
+		if (ret) {
+			pr_err("Dump files (pid: %d) failed with %d\n", pid, ret);
+			goto err_cure;
+		}
 	}
 
 	ret = parasite_dump_pages_seized(parasite_ctl, &vma_area_list, cr_fdset);
diff --git a/cr-restore.c b/cr-restore.c
index 86398f9..b73497a 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -587,13 +587,33 @@ static int pstree_wait_helpers()
 
 static int restore_one_alive_task(int pid, CoreEntry *core)
 {
+	struct pstree_item *child;
 	pr_info("Restoring resources\n");
 
 	if (pstree_wait_helpers())
 		return -1;
 
-	if (prepare_fds(current))
-		return -1;
+	/*
+	 * Wait all children, who share a current fd table.
+	 * We should be sure, that children don't use any file
+	 * descriptor while fdtable is being restored.
+	 */
+	list_for_each_entry(child, &current->children, sibling) {
+		if (!shared_fdtable(child))
+			continue;
+		futex_wait_until(child->rst->fdt_lock, FDT_LOCK_SYNC);
+	}
+
+	if (!shared_fdtable(current)) {
+		if (prepare_fds(current))
+			return -1;
+	} else {
+		/* Notify a parent, that a current is ready for restoring fdtable */
+		futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_SYNC);
+		futex_wait_until(current->parent->rst->fdt_lock, FDT_LOCK_DONE);
+	}
+
+	futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_DONE);
 
 	if (prepare_fs(pid))
 		return -1;
@@ -670,6 +690,7 @@ static int restore_one_zombie(int pid, int exit_code)
 {
 	pr_info("Restoring zombie with %d code\n", exit_code);
 
+	futex_set_and_wake(current->rst->fdt_lock, FDT_LOCK_DONE);
 	if (task_entries != NULL) {
 		futex_dec_and_wake(&task_entries->nr_in_progress);
 		futex_wait_while(&task_entries->start, CR_STATE_RESTORE);
@@ -794,6 +815,9 @@ static inline int fork_with_pid(struct pstree_item *item, unsigned long ns_clone
 	ca.item = item;
 	ca.clone_flags = ns_clone_flags;
 
+	if (shared_fdtable(item))
+		ca.clone_flags |= CLONE_FILES;
+
 	if (!(ca.clone_flags & CLONE_NEWPID)) {
 		char buf[32];
 
@@ -1004,10 +1028,18 @@ static int restore_task_with_children(void *_arg)
 	int ret;
 	sigset_t blockmask;
 
-	close_safe(&ca->fd);
-
 	current = ca->item;
 
+	if (ca->clone_flags & CLONE_FILES)
+		ret = clone_service_fd(false);
+	else {
+		close_safe(&ca->fd);
+		ret = clone_service_fd(true);
+	}
+
+	if (ret != 0)
+		return -1;
+
 	pid = getpid();
 	if (current->pid.virt != pid) {
 		pr_err("Pid %d do not match expected %d\n", pid, current->pid.virt);
@@ -1041,6 +1073,9 @@ static int restore_task_with_children(void *_arg)
 			exit(-1);
 	}
 
+	if (!shared_fdtable(current))
+		close_old_fds(current);
+
 	/*
 	 * The block mask will be restored in sigresturn.
 	 *
diff --git a/files.c b/files.c
index 9d25f16..aacdb21 100644
--- a/files.c
+++ b/files.c
@@ -496,7 +496,7 @@ static int open_fdinfos(int pid, struct list_head *list, int state)
 	return ret;
 }
 
-static int close_old_fds(struct pstree_item *me)
+int close_old_fds(struct pstree_item *me)
 {
 	DIR *dir;
 	struct dirent *de;
@@ -533,10 +533,6 @@ int prepare_fds(struct pstree_item *me)
 	u32 ret;
 	int state;
 
-	ret = close_old_fds(me);
-	if (ret)
-		goto err;
-
 	pr_info("Opening fdinfo-s\n");
 
 	for (state = 0; state < ARRAY_SIZE(states); state++) {
@@ -562,7 +558,6 @@ int prepare_fds(struct pstree_item *me)
 			break;
 	}
 
-err:
 	tty_fini_fds();
 	return ret;
 }
diff --git a/include/crtools.h b/include/crtools.h
index 6c81277..baa558b 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -226,10 +226,18 @@ struct vma_area {
 #define vma_area_is(vma_area, s)	vma_entry_is(&((vma_area)->vma), s)
 #define vma_area_len(vma_area)		vma_entry_len(&((vma_area)->vma))
 
+enum {
+	FDT_LOCK_INIT = 0,
+	FDT_LOCK_SYNC,
+	FDT_LOCK_DONE,
+};
+
 struct rst_info {
 	struct list_head	fds;
 	struct list_head	eventpoll;
 	struct list_head	tty_slaves;
+
+	futex_t			*fdt_lock;
 };
 
 static inline int in_vma_area(struct vma_area *vma, unsigned long addr)
diff --git a/include/files.h b/include/files.h
index 4bdc9fe..e5f3271 100644
--- a/include/files.h
+++ b/include/files.h
@@ -94,6 +94,7 @@ extern int get_filemap_fd(int pid, VmaEntry *vma_entry);
 extern int prepare_fs(int pid);
 extern int set_fd_flags(int fd, int flags);
 
+extern int close_old_fds(struct pstree_item *me);
 #ifndef AT_EMPTY_PATH
 #define AT_EMPTY_PATH 0x1000
 #endif
diff --git a/include/pstree.h b/include/pstree.h
index 307113d..c146c70 100644
--- a/include/pstree.h
+++ b/include/pstree.h
@@ -48,6 +48,11 @@ struct pstree_item {
 	struct rst_info		rst[0];
 };
 
+static inline int shared_fdtable(struct pstree_item *item) {
+	return (item->parent && item->parent->state != TASK_HELPER &&
+		item->files_id && item->files_id == item->parent->files_id);
+}
+
 extern void free_pstree(struct pstree_item *root_item);
 extern struct pstree_item *__alloc_pstree_item(bool rst);
 #define alloc_pstree_item() __alloc_pstree_item(false)
diff --git a/pstree.c b/pstree.c
index 7534344..83d2a0e 100644
--- a/pstree.c
+++ b/pstree.c
@@ -5,6 +5,7 @@
 #include "pstree.h"
 #include "restorer.h"
 #include "util.h"
+#include "lock.h"
 
 #include "protobuf.h"
 #include "protobuf/pstree.pb-c.h"
@@ -37,6 +38,16 @@ struct pstree_item *__alloc_pstree_item(bool rst)
 	if (!item)
 		return NULL;
 
+	if (rst) {
+		item->rst->fdt_lock = shmalloc(sizeof(*item->rst->fdt_lock));
+		if (item->rst->fdt_lock == NULL) {
+			xfree(item);
+			return NULL;
+		}
+
+		futex_init(item->rst->fdt_lock);
+	}
+
 	INIT_LIST_HEAD(&item->children);
 	INIT_LIST_HEAD(&item->sibling);
 
-- 
1.7.11.7



More information about the CRIU mailing list