[CRIU] [PATCH v2] dump: add timeout for collecting processes
Pavel Emelyanov
xemul at parallels.com
Thu Dec 10 03:54:58 PST 2015
On 12/08/2015 02:36 PM, Andrey Ryabinin wrote:
> Currently criu dump may hang indefinitely. E.g. in wait for task
> that blocked in vfork() or task could be in D state for some other
> reason. This patch adds time limit on dump operation.
> If collecting processes takes too long, dump process will be terminated.
> Timeout is 20 seconds by default, but it could be changed via parameter.
>
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> ---
> crtools.c | 10 ++++++++--
> include/cr_options.h | 3 +++
> seize.c | 10 ++++++++++
> 3 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/crtools.c b/crtools.c
> index d3812a1..17aac53 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -64,6 +64,7 @@ void init_opts(void)
> opts.manage_cgroups = CG_MODE_DEFAULT;
> opts.ps_socket = -1;
> opts.ghost_limit = DEFAULT_GHOST_LIMIT;
> + opts.timeout = DEFAULT_TIMEOUT;
> }
>
> static int parse_ns_string(const char *ptr)
> @@ -253,6 +254,7 @@ int main(int argc, char *argv[], char *envp[])
> { "freeze-cgroup", required_argument, 0, 1068 },
> { "ghost-limit", required_argument, 0, 1069 },
> { "irmap-scan-path", required_argument, 0, 1070 },
> + { "timeout", required_argument, 0, 1071 },
> { },
> };
>
> @@ -498,6 +500,9 @@ int main(int argc, char *argv[], char *envp[])
> if (irmap_scan_path_add(optarg))
> return -1;
> break;
> + case 1071:
> + opts.timeout = atoi(optarg);
> + break;
> case 'M':
> {
> char *aux;
> @@ -708,8 +713,9 @@ usage:
> " 'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n"
> " --exec-cmd execute the command specified after '--' on successful\n"
> " restore making it the parent of the restored process\n"
> -" --freeze-cgroup\n"
> -" use cgroup freezer to collect processes\n"
> +" --freeze-cgroup use cgroup freezer to collect processes\n"
> +" --timeout seconds timeout in seconds for dump. Dumping will fail if timeout reached.\n"
You disarm alarm() after freezeing tasks, thus that's timeout not for
dump, but for freezeing period.
> +" If not specified timeout is 20 seconds\n"
> "\n"
> "* Special resources support:\n"
> " -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> diff --git a/include/cr_options.h b/include/cr_options.h
> index eac7283..6296721 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -38,6 +38,8 @@ struct cg_root_opt {
> */
> #define DEFAULT_GHOST_LIMIT (1 << 20)
>
> +#define DEFAULT_TIMEOUT 20
Since it's freezeing, 20 seconds is too much. Maybe 5?
> +
> struct irmap;
>
> struct irmap_path_opt {
> @@ -95,6 +97,7 @@ struct cr_options {
> bool overlayfs;
> size_t ghost_limit;
> struct list_head irmap_scan_paths;
> + unsigned int timeout;
> };
>
> extern struct cr_options opts;
> diff --git a/seize.c b/seize.c
> index de5c929..63adf5e 100644
> --- a/seize.c
> +++ b/seize.c
> @@ -568,6 +568,12 @@ int collect_pstree(pid_t pid)
> goto err;
> }
>
> + /*
> + * Fire SIGALRM if timeout reached and terminate current
> + * process (this is a default action for SIGALRM).
> + */
> + alarm(opts.timeout);
No, that's not nice. Just killing criu process would leave all dumpee-s with
fractions of parasite code in.
> +
> ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
> if (ret < 0)
> goto err;
> @@ -578,11 +584,15 @@ int collect_pstree(pid_t pid)
> if (ret < 0)
> goto err;
>
> + /* cancel pending alarm */
Please, don't write comments that describe only what is done.
If describing, then _why_ something is done.
> + alarm(0);
> timing_stop(TIME_FREEZING);
> timing_start(TIME_FROZEN);
>
> return 0;
> err:
> + /* cancel pending alarm */
> + alarm(0);
> pstree_switch_state(root_item, TASK_ALIVE);
> return -1;
> }
>
More information about the CRIU
mailing list