[CRIU] [PATCH] dump: don't call rollback actions from a signal handler

Andrey Vagin avagin at openvz.org
Mon Feb 22 15:33:01 PST 2016


From: Andrew Vagin <avagin at virtuozzo.com>

We can do this, but we need to be sure that all structures
are consistant in any moment and we need to block alarm when
they are inconsistant.

I don't think that we really want to do this now. I suggest to
interupt a current syscall if an alarm signal is triggered.

https://jira.sw.ru/browse/PSBM-44371

v2: print an error message before exiting
Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
 criu/cr-dump.c       | 45 +++++++++++++++++++++++----------------------
 criu/include/seize.h |  1 +
 criu/seize.c         | 22 +++++++++++++++-------
 3 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 8f2587b..acb6cbe 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1366,13 +1366,30 @@ err_cure_imgset:
 	goto err;
 }
 
-typedef void (*sa_handler_t)(int);
+static int alarm_attempts = 0;
 
-static int setup_alarm_handler(sa_handler_t handler)
+bool alarm_timeouted() {
+	return alarm_attempts > 0;
+}
+
+static void alarm_handler(int signo)
+{
+
+	pr_err("Timeout reached. Try to interrupt: %d\n", alarm_attempts);
+	if (alarm_attempts++ < 5) {
+		alarm(1);
+		/* A curren syscall will be exited with EINTR */
+		return;
+	}
+	pr_err("FATAL: Unable to interrupt the current operation\n");
+	BUG();
+}
+
+static int setup_alarm_handler()
 {
 	struct sigaction sa = {
-		.sa_handler	= handler,
-		.sa_flags	= 0,
+		.sa_handler	= alarm_handler,
+		.sa_flags	= 0, /* Don't restart syscalls */
 	};
 
 	sigemptyset(&sa.sa_mask);
@@ -1437,15 +1454,6 @@ static int cr_pre_dump_finish(struct list_head *ctls, int ret)
 	return ret;
 }
 
-void pre_dump_alarm_handler(int signum)
-{
-	LIST_HEAD(empty_list);
-
-	pr_err("Timeout reached\n");
-	cr_pre_dump_finish(&empty_list, -1);
-	exit(-1);
-}
-
 int cr_pre_dump_tasks(pid_t pid)
 {
 	struct pstree_item *item;
@@ -1483,7 +1491,7 @@ int cr_pre_dump_tasks(pid_t pid)
 	if (connect_to_page_server())
 		goto err;
 
-	if (setup_alarm_handler(pre_dump_alarm_handler))
+	if (setup_alarm_handler())
 		goto err;
 
 	if (collect_pstree(pid))
@@ -1585,13 +1593,6 @@ static int cr_dump_finish(int ret)
 	return post_dump_ret ? : (ret != 0);
 }
 
-void dump_alarm_handler(int signum)
-{
-	pr_err("Timeout reached\n");
-	cr_dump_finish(-1);
-	exit(-1);
-}
-
 int cr_dump_tasks(pid_t pid)
 {
 	InventoryEntry he = INVENTORY_ENTRY__INIT;
@@ -1640,7 +1641,7 @@ int cr_dump_tasks(pid_t pid)
 	if (connect_to_page_server())
 		goto err;
 
-	if (setup_alarm_handler(dump_alarm_handler))
+	if (setup_alarm_handler())
 		goto err;
 
 	/*
diff --git a/criu/include/seize.h b/criu/include/seize.h
index 315fab2..9cfebb1 100644
--- a/criu/include/seize.h
+++ b/criu/include/seize.h
@@ -4,5 +4,6 @@
 extern int collect_pstree(pid_t pid);
 extern void pstree_switch_state(struct pstree_item *root_item, int st);
 extern const char *get_real_freezer_state(void);
+extern bool alarm_timeouted(void);
 
 #endif
diff --git a/criu/seize.c b/criu/seize.c
index b922723..3fd3d62 100644
--- a/criu/seize.c
+++ b/criu/seize.c
@@ -297,6 +297,9 @@ static int freeze_processes(void)
 			continue;
 		}
 
+		if (alarm_timeouted())
+			goto err;
+
 		timeout = 100000000 * (i + 1); /* 100 msec */
 		req.tv_nsec = timeout % 1000000000;
 		req.tv_sec = timeout / 1000000000;
@@ -357,6 +360,11 @@ static int collect_children(struct pstree_item *item)
 
 		nr_inprogress++;
 
+		if (alarm_timeouted()) {
+			ret = -1;
+			goto free;
+		}
+
 		pr_info("Seized task %d, state %d\n", pid, ret);
 
 		c = alloc_pstree_item();
@@ -642,6 +650,13 @@ int collect_pstree(pid_t pid)
 
 	timing_start(TIME_FREEZING);
 
+	/*
+	 * wait4() may hang for some reason. Enable timer and fire SIGALRM
+	 * if timeout reached. SIGALRM handler will do  the necessary
+	 * cleanups and terminate current process.
+	 */
+	alarm(opts.timeout);
+
 	if (opts.freeze_cgroup && freeze_processes())
 		goto err;
 
@@ -656,13 +671,6 @@ int collect_pstree(pid_t pid)
 		goto err;
 	}
 
-	/*
-	 * wait4() may hang for some reason. Enable timer and fire SIGALRM
-	 * if timeout reached. SIGALRM handler will do  the necessary
-	 * cleanups and terminate current process.
-	 */
-	alarm(opts.timeout);
-
 	ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
 	if (ret < 0)
 		goto err;
-- 
2.5.0



More information about the CRIU mailing list