[CRIU] [PATCH v2] dump: add timeout for collecting processes

Pavel Emelyanov xemul at parallels.com
Thu Dec 10 03:54:58 PST 2015


On 12/08/2015 02:36 PM, Andrey Ryabinin wrote:
> Currently criu dump may hang indefinitely. E.g. in wait for task
> that blocked in vfork() or task could be in D state for some other
> reason. This patch adds time limit on dump operation.
> If collecting processes takes too long, dump process will be terminated.
> Timeout is 20 seconds by default, but it could be changed via parameter.
> 
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> ---
>  crtools.c            | 10 ++++++++--
>  include/cr_options.h |  3 +++
>  seize.c              | 10 ++++++++++
>  3 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/crtools.c b/crtools.c
> index d3812a1..17aac53 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -64,6 +64,7 @@ void init_opts(void)
>  	opts.manage_cgroups = CG_MODE_DEFAULT;
>  	opts.ps_socket = -1;
>  	opts.ghost_limit = DEFAULT_GHOST_LIMIT;
> +	opts.timeout = DEFAULT_TIMEOUT;
>  }
>  
>  static int parse_ns_string(const char *ptr)
> @@ -253,6 +254,7 @@ int main(int argc, char *argv[], char *envp[])
>  		{ "freeze-cgroup",		required_argument,	0, 1068 },
>  		{ "ghost-limit",		required_argument,	0, 1069 },
>  		{ "irmap-scan-path",		required_argument,	0, 1070 },
> +		{ "timeout",			required_argument,	0, 1071 },
>  		{ },
>  	};
>  
> @@ -498,6 +500,9 @@ int main(int argc, char *argv[], char *envp[])
>  			if (irmap_scan_path_add(optarg))
>  				return -1;
>  			break;
> +		case 1071:
> +			opts.timeout = atoi(optarg);
> +			break;
>  		case 'M':
>  			{
>  				char *aux;
> @@ -708,8 +713,9 @@ usage:
>  "                        'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n"
>  "     --exec-cmd         execute the command specified after '--' on successful\n"
>  "                        restore making it the parent of the restored process\n"
> -"  --freeze-cgroup\n"
> -"                        use cgroup freezer to collect processes\n"
> +"  --freeze-cgroup       use cgroup freezer to collect processes\n"
> +"  --timeout seconds     timeout in seconds for dump. Dumping will fail if timeout reached.\n"

You disarm alarm() after freezeing tasks, thus that's timeout not for
dump, but for freezeing period.

> +"                        If not specified timeout is 20 seconds\n"
>  "\n"
>  "* Special resources support:\n"
>  "  -x|--" USK_EXT_PARAM "inode,.." "      allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> diff --git a/include/cr_options.h b/include/cr_options.h
> index eac7283..6296721 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -38,6 +38,8 @@ struct cg_root_opt {
>   */
>  #define DEFAULT_GHOST_LIMIT	(1 << 20)
>  
> +#define DEFAULT_TIMEOUT		20

Since it's freezeing, 20 seconds is too much. Maybe 5?

> +
>  struct irmap;
>  
>  struct irmap_path_opt {
> @@ -95,6 +97,7 @@ struct cr_options {
>  	bool			overlayfs;
>  	size_t			ghost_limit;
>  	struct list_head	irmap_scan_paths;
> +	unsigned int		timeout;
>  };
>  
>  extern struct cr_options opts;
> diff --git a/seize.c b/seize.c
> index de5c929..63adf5e 100644
> --- a/seize.c
> +++ b/seize.c
> @@ -568,6 +568,12 @@ int collect_pstree(pid_t pid)
>  		goto err;
>  	}
>  
> +	/*
> +	 * Fire SIGALRM if timeout reached and terminate current
> +	 * process (this is a default action for SIGALRM).
> +	 */
> +	alarm(opts.timeout);

No, that's not nice. Just killing criu process would leave all dumpee-s with
fractions of parasite code in.

> +
>  	ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
>  	if (ret < 0)
>  		goto err;
> @@ -578,11 +584,15 @@ int collect_pstree(pid_t pid)
>  	if (ret < 0)
>  		goto err;
>  
> +	/* cancel pending alarm */

Please, don't write comments that describe only what is done.
If describing, then _why_ something is done.

> +	alarm(0);
>  	timing_stop(TIME_FREEZING);
>  	timing_start(TIME_FROZEN);
>  
>  	return 0;
>  err:
> +	/* cancel pending alarm */
> +	alarm(0);
>  	pstree_switch_state(root_item, TASK_ALIVE);
>  	return -1;
>  }
> 



More information about the CRIU mailing list