[CRIU] [PATCH v3 4/4] dump: add timeout for collecting processes

Andrey Ryabinin aryabinin at virtuozzo.com
Wed Dec 16 03:59:51 PST 2015


Currently criu dump may hang indefinitely. E.g. in wait for task
that blocked in vfork() or task could be in D state for some other
reason. This patch adds time limit on collecting tasks during the
dump operation. If collecting processes takes too long, the dump
process will be terminated. Timeout is 5 seconds by default, but
it could be changed via parameter.

Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 cr-dump.c            | 41 +++++++++++++++++++++++++++++++++++++++++
 crtools.c            |  4 ++++
 include/cr_options.h |  3 +++
 seize.c              |  9 +++++++++
 4 files changed, 57 insertions(+)

diff --git a/cr-dump.c b/cr-dump.c
index 88f3064..9049c1b 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -1351,6 +1351,25 @@ err_cure_imgset:
 	goto err;
 }
 
+typedef void (*sa_handler_t)(int);
+
+static int setup_alarm_handler(sa_handler_t handler)
+{
+	struct sigaction sa = {
+		.sa_handler	= handler,
+		.sa_flags	= 0,
+	};
+
+	sigemptyset(&sa.sa_mask);
+	sigaddset(&sa.sa_mask, SIGALRM);
+	if (sigaction(SIGALRM, &sa, NULL)) {
+		pr_perror("Unable to setup SIGALRM handler");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int cr_pre_dump_finish(struct list_head *ctls, int ret)
 {
 	struct parasite_ctl *ctl, *n;
@@ -1403,6 +1422,15 @@ static int cr_pre_dump_finish(struct list_head *ctls, int ret)
 	return ret;
 }
 
+void pre_dump_alarm_handler(int signum)
+{
+	LIST_HEAD(empty_list);
+
+	pr_err("Timeout reached\n");
+	cr_pre_dump_finish(&empty_list, -1);
+	exit(-1);
+}
+
 int cr_pre_dump_tasks(pid_t pid)
 {
 	struct pstree_item *item;
@@ -1437,6 +1465,9 @@ int cr_pre_dump_tasks(pid_t pid)
 	if (connect_to_page_server())
 		goto err;
 
+	if (setup_alarm_handler(pre_dump_alarm_handler))
+		goto err;
+
 	if (collect_pstree(pid))
 		goto err;
 
@@ -1537,6 +1568,13 @@ static int cr_dump_finish(int ret)
 	return post_dump_ret ? : (ret != 0);
 }
 
+void dump_alarm_handler(int signum)
+{
+	pr_err("Timeout reached\n");
+	cr_dump_finish(-1);
+	exit(-1);
+}
+
 int cr_dump_tasks(pid_t pid)
 {
 	struct pstree_item *item;
@@ -1584,6 +1622,9 @@ int cr_dump_tasks(pid_t pid)
 	if (connect_to_page_server())
 		goto err;
 
+	if (setup_alarm_handler(dump_alarm_handler))
+		goto err;
+
 	/*
 	 * The collect_pstree will also stop (PTRACE_SEIZE) the tasks
 	 * thus ensuring that they don't modify anything we collect
diff --git a/crtools.c b/crtools.c
index f357c0c..6d5817f 100644
--- a/crtools.c
+++ b/crtools.c
@@ -65,6 +65,7 @@ void init_opts(void)
 	opts.manage_cgroups = CG_MODE_DEFAULT;
 	opts.ps_socket = -1;
 	opts.ghost_limit = DEFAULT_GHOST_LIMIT;
+	opts.timeout = DEFAULT_TIMEOUT;
 }
 
 static int parse_ns_string(const char *ptr)
@@ -255,6 +256,7 @@ int main(int argc, char *argv[], char *envp[])
 		{ "ghost-limit",		required_argument,	0, 1069 },
 		{ "irmap-scan-path",		required_argument,	0, 1070 },
 		{ "lsm-profile",		required_argument,	0, 1071 },
+		{ "timeout",			required_argument,	0, 1072 },
 		{ },
 	};
 
@@ -503,6 +505,8 @@ int main(int argc, char *argv[], char *envp[])
 		case 1071:
 			if (parse_lsm_arg(optarg) < 0)
 				return -1;
+		case 1072:
+			opts.timeout = atoi(optarg);
 			break;
 		case 'M':
 			{
diff --git a/include/cr_options.h b/include/cr_options.h
index 2b1054d..133595a 100644
--- a/include/cr_options.h
+++ b/include/cr_options.h
@@ -38,6 +38,8 @@ struct cg_root_opt {
  */
 #define DEFAULT_GHOST_LIMIT	(1 << 20)
 
+#define DEFAULT_TIMEOUT		5
+
 struct irmap;
 
 struct irmap_path_opt {
@@ -97,6 +99,7 @@ struct cr_options {
 	struct list_head	irmap_scan_paths;
 	bool			lsm_supplied;
 	char			*lsm_profile;
+	unsigned int		timeout;
 };
 
 extern struct cr_options opts;
diff --git a/seize.c b/seize.c
index c94e22d..92c2879 100644
--- a/seize.c
+++ b/seize.c
@@ -635,6 +635,13 @@ int collect_pstree(pid_t pid)
 		goto err;
 	}
 
+	/*
+	 * wait4() may hang for some reason. Enable timer and fire SIGALRM
+	 * if timeout reached. SIGALRM handler will do  the necessary
+	 * cleanups and terminate current process.
+	 */
+	alarm(opts.timeout);
+
 	ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
 	if (ret < 0)
 		goto err;
@@ -653,6 +660,8 @@ int collect_pstree(pid_t pid)
 	timing_start(TIME_FROZEN);
 
 err:
+	/* Freezing stage finished in time - disable timer. */
+	alarm(0);
 	return ret;
 }
 
-- 
2.4.10



More information about the CRIU mailing list