[CRIU] [PATCH] restore: Serialize access to last_pid

Cyrill Gorcunov gorcunov at gmail.com
Fri Nov 15 17:33:34 MSK 2019


When we do clone threads in a later stage of
restore procedure it may race with helpers
which do call clone_noasan by self.

In particular when restoring mount points
we do use try_remount_writable which is called
via call_helper_process and produce own fork.

Thus to eliminate the race which happens that
later lets allocate clone_noasan context via
shared memory on restore and then once
processes are forked we pass @last_pid_mutex
to the clone_noasan engine and all subsequent
calls to clone_noasan will be serialized with
threads creation.

To be honest I have no clue right now how to
provide some kind of test for this stuff. Our
QA team reported the issue as

 | (07.398000) pie: 20768: Error (criu/pie/restorer.c:1823): Unable to create a thread 20770: 20775
 | (07.398013) pie: 20768: Error (criu/pie/restorer.c:1828): Reread last_pid 20775

where we wrote 20769 into last_pid inside pie thread
restore , it failed and re-reading the last_pid returned
20775 instead of expecting value, which means there is
other forks happen inbetween without taking last_pid
lock.

Also I'm in big doubt this architectural solution, don't
like it much, it was rather a fast fix which address
the problem. So any other ideas are welcome.

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 criu/clone-noasan.c         | 63 ++++++++++++++++++++++++++++++++++++-
 criu/cr-restore.c           |  6 ++++
 criu/include/clone-noasan.h |  5 +++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c
index 5ca280eb8386..627d924d5346 100644
--- a/criu/clone-noasan.c
+++ b/criu/clone-noasan.c
@@ -1,8 +1,65 @@
 #include <sched.h>
+#include <sys/mman.h>
 #include "common/compiler.h"
+#include "clone-noasan.h"
 #include "log.h"
 #include "common/bug.h"
 
+#undef LOG_PREFIX
+#define LOG_PREFIX "clone_noasan: "
+
+static struct {
+	mutex_t		op_mutex;
+	mutex_t		*clone_mutex;
+} *context;
+
+int clone_noasan_init(void)
+{
+	context = mmap(NULL, sizeof(*context), PROT_READ | PROT_WRITE,
+		       MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (context == MAP_FAILED) {
+		pr_perror("Can't allocate context");
+		return -1;
+	}
+
+	mutex_init(&context->op_mutex);
+	return 0;
+}
+
+void clone_noasan_fini(void)
+{
+	munmap(context, sizeof(*context));
+	context = NULL;
+}
+
+static inline void context_lock(void)
+{
+	if (context && context->clone_mutex)
+		mutex_lock(context->clone_mutex);
+}
+
+static inline void context_unlock(void)
+{
+	if (context && context->clone_mutex)
+		mutex_unlock(context->clone_mutex);
+}
+
+int clone_noasan_set_mutex(mutex_t *clone_mutex)
+{
+	if (!context) {
+		pr_err_once("Context is missing\n");
+		return -ENOENT;
+	}
+
+	mutex_lock(&context->op_mutex);
+	if (context->clone_mutex)
+		return -EBUSY;
+	context->clone_mutex = clone_mutex;
+	mutex_unlock(&context->op_mutex);
+
+	return 0;
+}
+
 /*
  * ASan doesn't play nicely with clone if we use current stack for
  * child task. ASan puts local variables on the fake stack
@@ -23,9 +80,13 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg)
 {
 	void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16);
 	BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK));
+	int ret;
 	/*
 	 * Reserve some bytes for clone() internal needs
 	 * and use as stack the address above this area.
 	 */
-	return clone(fn, stack_ptr, flags, arg);
+	context_lock();
+	ret = clone(fn, stack_ptr, flags, arg);
+	context_unlock();
+	return ret;
 }
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index de0b2cb407dd..17bff7f3eb42 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1814,6 +1814,7 @@ static int restore_task_with_children(void *_arg)
 		if (restore_wait_other_tasks())
 			goto err;
 		fini_restore_mntns();
+		clone_noasan_set_mutex(&task_entries->last_pid_mutex);
 		__restore_switch_stage(CR_STATE_RESTORE);
 	} else {
 		if (restore_finish_stage(task_entries, CR_STATE_FORKING) < 0)
@@ -2301,6 +2302,7 @@ static int restore_root_task(struct pstree_item *init)
 	if (!opts.restore_detach && !opts.exec_cmd)
 		wait(NULL);
 
+	clone_noasan_fini();
 	return 0;
 
 out_kill:
@@ -2331,6 +2333,7 @@ static int restore_root_task(struct pstree_item *init)
 	depopulate_roots_yard(mnt_ns_fd, true);
 	stop_usernsd();
 	__restore_switch_stage(CR_STATE_FAIL);
+	clone_noasan_fini();
 	pr_err("Restoring FAILED.\n");
 	return -1;
 }
@@ -2391,6 +2394,9 @@ int cr_restore_tasks(void)
 	if (cpu_init() < 0)
 		goto err;
 
+	if (clone_noasan_init() < 0)
+		goto err;
+
 	if (vdso_init_restore())
 		goto err;
 
diff --git a/criu/include/clone-noasan.h b/criu/include/clone-noasan.h
index 8ef75fa73675..90d94a939a7c 100644
--- a/criu/include/clone-noasan.h
+++ b/criu/include/clone-noasan.h
@@ -1,6 +1,11 @@
 #ifndef __CR_CLONE_NOASAN_H__
 #define __CR_CLONE_NOASAN_H__
 
+#include "common/lock.h"
+
 int clone_noasan(int (*fn)(void *), int flags, void *arg);
+int clone_noasan_init(void);
+void clone_noasan_fini(void);
+int clone_noasan_set_mutex(mutex_t *clone_mutex);
 
 #endif /* __CR_CLONE_NOASAN_H__ */
-- 
2.20.1



More information about the CRIU mailing list