[CRIU] [PATCH] dump: don't call rollback actions from a signal handler (v2)
Andrew Vagin
avagin at virtuozzo.com
Thu Feb 25 16:33:44 PST 2016
Add (v2) into the subject
On Tue, Feb 23, 2016 at 02:33:01AM +0300, Andrey Vagin wrote:
> From: Andrew Vagin <avagin at virtuozzo.com>
>
> We can do this, but we need to be sure that all structures
> are consistant in any moment and we need to block alarm when
> they are inconsistant.
>
> I don't think that we really want to do this now. I suggest to
> interupt a current syscall if an alarm signal is triggered.
>
> https://jira.sw.ru/browse/PSBM-44371
>
> v2: print an error message before exiting
> Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
> Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> ---
> criu/cr-dump.c | 45 +++++++++++++++++++++++----------------------
> criu/include/seize.h | 1 +
> criu/seize.c | 22 +++++++++++++++-------
> 3 files changed, 39 insertions(+), 29 deletions(-)
>
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 8f2587b..acb6cbe 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1366,13 +1366,30 @@ err_cure_imgset:
> goto err;
> }
>
> -typedef void (*sa_handler_t)(int);
> +static int alarm_attempts = 0;
>
> -static int setup_alarm_handler(sa_handler_t handler)
> +bool alarm_timeouted() {
> + return alarm_attempts > 0;
> +}
> +
> +static void alarm_handler(int signo)
> +{
> +
> + pr_err("Timeout reached. Try to interrupt: %d\n", alarm_attempts);
> + if (alarm_attempts++ < 5) {
> + alarm(1);
> + /* A curren syscall will be exited with EINTR */
> + return;
> + }
> + pr_err("FATAL: Unable to interrupt the current operation\n");
> + BUG();
> +}
> +
> +static int setup_alarm_handler()
> {
> struct sigaction sa = {
> - .sa_handler = handler,
> - .sa_flags = 0,
> + .sa_handler = alarm_handler,
> + .sa_flags = 0, /* Don't restart syscalls */
> };
>
> sigemptyset(&sa.sa_mask);
> @@ -1437,15 +1454,6 @@ static int cr_pre_dump_finish(struct list_head *ctls, int ret)
> return ret;
> }
>
> -void pre_dump_alarm_handler(int signum)
> -{
> - LIST_HEAD(empty_list);
> -
> - pr_err("Timeout reached\n");
> - cr_pre_dump_finish(&empty_list, -1);
> - exit(-1);
> -}
> -
> int cr_pre_dump_tasks(pid_t pid)
> {
> struct pstree_item *item;
> @@ -1483,7 +1491,7 @@ int cr_pre_dump_tasks(pid_t pid)
> if (connect_to_page_server())
> goto err;
>
> - if (setup_alarm_handler(pre_dump_alarm_handler))
> + if (setup_alarm_handler())
> goto err;
>
> if (collect_pstree(pid))
> @@ -1585,13 +1593,6 @@ static int cr_dump_finish(int ret)
> return post_dump_ret ? : (ret != 0);
> }
>
> -void dump_alarm_handler(int signum)
> -{
> - pr_err("Timeout reached\n");
> - cr_dump_finish(-1);
> - exit(-1);
> -}
> -
> int cr_dump_tasks(pid_t pid)
> {
> InventoryEntry he = INVENTORY_ENTRY__INIT;
> @@ -1640,7 +1641,7 @@ int cr_dump_tasks(pid_t pid)
> if (connect_to_page_server())
> goto err;
>
> - if (setup_alarm_handler(dump_alarm_handler))
> + if (setup_alarm_handler())
> goto err;
>
> /*
> diff --git a/criu/include/seize.h b/criu/include/seize.h
> index 315fab2..9cfebb1 100644
> --- a/criu/include/seize.h
> +++ b/criu/include/seize.h
> @@ -4,5 +4,6 @@
> extern int collect_pstree(pid_t pid);
> extern void pstree_switch_state(struct pstree_item *root_item, int st);
> extern const char *get_real_freezer_state(void);
> +extern bool alarm_timeouted(void);
>
> #endif
> diff --git a/criu/seize.c b/criu/seize.c
> index b922723..3fd3d62 100644
> --- a/criu/seize.c
> +++ b/criu/seize.c
> @@ -297,6 +297,9 @@ static int freeze_processes(void)
> continue;
> }
>
> + if (alarm_timeouted())
> + goto err;
> +
> timeout = 100000000 * (i + 1); /* 100 msec */
> req.tv_nsec = timeout % 1000000000;
> req.tv_sec = timeout / 1000000000;
> @@ -357,6 +360,11 @@ static int collect_children(struct pstree_item *item)
>
> nr_inprogress++;
>
> + if (alarm_timeouted()) {
> + ret = -1;
> + goto free;
> + }
> +
> pr_info("Seized task %d, state %d\n", pid, ret);
>
> c = alloc_pstree_item();
> @@ -642,6 +650,13 @@ int collect_pstree(pid_t pid)
>
> timing_start(TIME_FREEZING);
>
> + /*
> + * wait4() may hang for some reason. Enable timer and fire SIGALRM
> + * if timeout reached. SIGALRM handler will do the necessary
> + * cleanups and terminate current process.
> + */
> + alarm(opts.timeout);
> +
> if (opts.freeze_cgroup && freeze_processes())
> goto err;
>
> @@ -656,13 +671,6 @@ int collect_pstree(pid_t pid)
> goto err;
> }
>
> - /*
> - * wait4() may hang for some reason. Enable timer and fire SIGALRM
> - * if timeout reached. SIGALRM handler will do the necessary
> - * cleanups and terminate current process.
> - */
> - alarm(opts.timeout);
> -
> ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
> if (ret < 0)
> goto err;
> --
> 2.5.0
>
More information about the CRIU
mailing list