[CRIU] [PATCH] dump: don't call rollback actions from a signal handler (v2)

Andrew Vagin avagin at virtuozzo.com
Thu Feb 25 16:33:44 PST 2016


Add (v2) into the subject

On Tue, Feb 23, 2016 at 02:33:01AM +0300, Andrey Vagin wrote:
> From: Andrew Vagin <avagin at virtuozzo.com>
> 
> We can do this, but we need to be sure that all structures
> are consistant in any moment and we need to block alarm when
> they are inconsistant.
> 
> I don't think that we really want to do this now. I suggest to
> interupt a current syscall if an alarm signal is triggered.
> 
> https://jira.sw.ru/browse/PSBM-44371
> 
> v2: print an error message before exiting
> Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
> Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> ---
>  criu/cr-dump.c       | 45 +++++++++++++++++++++++----------------------
>  criu/include/seize.h |  1 +
>  criu/seize.c         | 22 +++++++++++++++-------
>  3 files changed, 39 insertions(+), 29 deletions(-)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 8f2587b..acb6cbe 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1366,13 +1366,30 @@ err_cure_imgset:
>  	goto err;
>  }
>  
> -typedef void (*sa_handler_t)(int);
> +static int alarm_attempts = 0;
>  
> -static int setup_alarm_handler(sa_handler_t handler)
> +bool alarm_timeouted() {
> +	return alarm_attempts > 0;
> +}
> +
> +static void alarm_handler(int signo)
> +{
> +
> +	pr_err("Timeout reached. Try to interrupt: %d\n", alarm_attempts);
> +	if (alarm_attempts++ < 5) {
> +		alarm(1);
> +		/* A curren syscall will be exited with EINTR */
> +		return;
> +	}
> +	pr_err("FATAL: Unable to interrupt the current operation\n");
> +	BUG();
> +}
> +
> +static int setup_alarm_handler()
>  {
>  	struct sigaction sa = {
> -		.sa_handler	= handler,
> -		.sa_flags	= 0,
> +		.sa_handler	= alarm_handler,
> +		.sa_flags	= 0, /* Don't restart syscalls */
>  	};
>  
>  	sigemptyset(&sa.sa_mask);
> @@ -1437,15 +1454,6 @@ static int cr_pre_dump_finish(struct list_head *ctls, int ret)
>  	return ret;
>  }
>  
> -void pre_dump_alarm_handler(int signum)
> -{
> -	LIST_HEAD(empty_list);
> -
> -	pr_err("Timeout reached\n");
> -	cr_pre_dump_finish(&empty_list, -1);
> -	exit(-1);
> -}
> -
>  int cr_pre_dump_tasks(pid_t pid)
>  {
>  	struct pstree_item *item;
> @@ -1483,7 +1491,7 @@ int cr_pre_dump_tasks(pid_t pid)
>  	if (connect_to_page_server())
>  		goto err;
>  
> -	if (setup_alarm_handler(pre_dump_alarm_handler))
> +	if (setup_alarm_handler())
>  		goto err;
>  
>  	if (collect_pstree(pid))
> @@ -1585,13 +1593,6 @@ static int cr_dump_finish(int ret)
>  	return post_dump_ret ? : (ret != 0);
>  }
>  
> -void dump_alarm_handler(int signum)
> -{
> -	pr_err("Timeout reached\n");
> -	cr_dump_finish(-1);
> -	exit(-1);
> -}
> -
>  int cr_dump_tasks(pid_t pid)
>  {
>  	InventoryEntry he = INVENTORY_ENTRY__INIT;
> @@ -1640,7 +1641,7 @@ int cr_dump_tasks(pid_t pid)
>  	if (connect_to_page_server())
>  		goto err;
>  
> -	if (setup_alarm_handler(dump_alarm_handler))
> +	if (setup_alarm_handler())
>  		goto err;
>  
>  	/*
> diff --git a/criu/include/seize.h b/criu/include/seize.h
> index 315fab2..9cfebb1 100644
> --- a/criu/include/seize.h
> +++ b/criu/include/seize.h
> @@ -4,5 +4,6 @@
>  extern int collect_pstree(pid_t pid);
>  extern void pstree_switch_state(struct pstree_item *root_item, int st);
>  extern const char *get_real_freezer_state(void);
> +extern bool alarm_timeouted(void);
>  
>  #endif
> diff --git a/criu/seize.c b/criu/seize.c
> index b922723..3fd3d62 100644
> --- a/criu/seize.c
> +++ b/criu/seize.c
> @@ -297,6 +297,9 @@ static int freeze_processes(void)
>  			continue;
>  		}
>  
> +		if (alarm_timeouted())
> +			goto err;
> +
>  		timeout = 100000000 * (i + 1); /* 100 msec */
>  		req.tv_nsec = timeout % 1000000000;
>  		req.tv_sec = timeout / 1000000000;
> @@ -357,6 +360,11 @@ static int collect_children(struct pstree_item *item)
>  
>  		nr_inprogress++;
>  
> +		if (alarm_timeouted()) {
> +			ret = -1;
> +			goto free;
> +		}
> +
>  		pr_info("Seized task %d, state %d\n", pid, ret);
>  
>  		c = alloc_pstree_item();
> @@ -642,6 +650,13 @@ int collect_pstree(pid_t pid)
>  
>  	timing_start(TIME_FREEZING);
>  
> +	/*
> +	 * wait4() may hang for some reason. Enable timer and fire SIGALRM
> +	 * if timeout reached. SIGALRM handler will do  the necessary
> +	 * cleanups and terminate current process.
> +	 */
> +	alarm(opts.timeout);
> +
>  	if (opts.freeze_cgroup && freeze_processes())
>  		goto err;
>  
> @@ -656,13 +671,6 @@ int collect_pstree(pid_t pid)
>  		goto err;
>  	}
>  
> -	/*
> -	 * wait4() may hang for some reason. Enable timer and fire SIGALRM
> -	 * if timeout reached. SIGALRM handler will do  the necessary
> -	 * cleanups and terminate current process.
> -	 */
> -	alarm(opts.timeout);
> -
>  	ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
>  	if (ret < 0)
>  		goto err;
> -- 
> 2.5.0
> 


More information about the CRIU mailing list