[CRIU] [PATCH 4/4] dump: use freezer cgroup to seize processes (v3)

Pavel Emelyanov xemul at parallels.com
Fri Aug 7 03:56:33 PDT 2015


On 08/06/2015 12:37 PM, Andrey Vagin wrote:
> Without using a freezer cgroup, we need to do a few iterations to catch
> all tasks, because a new tasks can be born. If new tasks appear faster
> than criu collects them, criu fails. The freezer cgroup allows to
> solve this problem.
> 
> We freeze the freezer group, then attaches to tasks with ptrace and thaw
> the freezer cgroup. We suppose that all tasks which are going to be
> dumped in a specified freezer group.
> 
> v2: fix comments from Christopher
> Reviewed-by: Christopher Covington <cov at codeaurora.org>
> 
> v3: refactor task_seize
> 
> Cc: Christopher Covington <cov at codeaurora.org>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  crtools.c            |   6 ++
>  include/cr_options.h |   1 +
>  seize.c              | 181 +++++++++++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 184 insertions(+), 4 deletions(-)
> 
> diff --git a/crtools.c b/crtools.c
> index 6af6080..9096420 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -235,6 +235,7 @@ int main(int argc, char *argv[], char *envp[])
>  		{ "enable-fs",			required_argument,	0, 1065 },
>  		{ "enable-external-sharing", 	no_argument, 		0, 1066 },
>  		{ "enable-external-masters", 	no_argument, 		0, 1067 },
> +		{ "freeze-cgroup",		required_argument,	0, 1068 },
>  		{ },
>  	};
>  
> @@ -465,6 +466,9 @@ int main(int argc, char *argv[], char *envp[])
>  		case 1067:
>  			opts.enable_external_masters = true;
>  			break;
> +		case 1068:
> +			opts.freeze_cgroup = optarg;
> +			break;
>  		case 'M':
>  			{
>  				char *aux;
> @@ -676,6 +680,8 @@ usage:
>  "                        'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n"
>  "     --exec-cmd         execute the command specified after '--' on successful\n"
>  "                        restore making it the parent of the restored process\n"
> +"  --freeze-cgroup\n"
> +"                        use cgroup freezer to collect processes\n"
>  "\n"
>  "* Special resources support:\n"
>  "  -x|--" USK_EXT_PARAM "inode,.." "      allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> diff --git a/include/cr_options.h b/include/cr_options.h
> index 19c2f77..f981806 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -57,6 +57,7 @@ struct cr_options {
>  	char			*output;
>  	char			*root;
>  	char			*pidfile;
> +	char			*freeze_cgroup;
>  	struct list_head	veth_pairs;
>  	struct list_head	scripts;
>  	struct list_head	ext_mounts;
> diff --git a/seize.c b/seize.c
> index 86df3f0..ab43ad3 100644
> --- a/seize.c
> +++ b/seize.c
> @@ -18,6 +18,169 @@
>  
>  #define NR_ATTEMPTS 5
>  
> +const char frozen[]	= "FROZEN";
> +const char freezing[]	= "FREEZING";
> +const char thawed[]	= "THAWED";
> +
> +static const char *get_freezer_state(int fd)
> +{
> +	int ret;
> +	char path[PATH_MAX];
> +
> +	lseek(fd, 0, SEEK_SET);
> +	ret = read(fd, path, sizeof(path) - 1);
> +	if (ret <= 0) {
> +		pr_perror("Unable to get a current state");
> +		goto err;
> +	}
> +	if (path[ret - 1] == '\n')
> +		path[ret - 1] = 0;
> +	else
> +		path[ret] = 0;
> +
> +	pr_debug("freezer.state=%s\n", path);
> +	if (strcmp(path, frozen) == 0)
> +		return frozen;
> +	if (strcmp(path, freezing) == 0)
> +		return freezing;
> +	if (strcmp(path, thawed) == 0)
> +		return thawed;
> +
> +	pr_err("Unknown freezer state: %s", path);
> +err:
> +	return NULL;
> +}
> +
> +static bool freezer_thawed;
> +
> +static int freezer_restore_state(void)
> +{
> +	int fd;
> +	char path[PATH_MAX];
> +
> +	if (!opts.freeze_cgroup || freezer_thawed)

The freezer_thawed is set to true when you change it into frozen
state, why return 0 here?

> +		return 0;
> +
> +	snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> +	fd = open(path, O_RDWR);
> +	if (fd < 0) {
> +		pr_perror("Unable to open %s", path);
> +		return -1;
> +	}
> +
> +	if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {

You always put freezer cgroup into frozen state here. Why?

> +			pr_perror("Unable to freeze tasks");
> +			close(fd);
> +			return -1;
> +	}
> +	close(fd);
> +	return 0;
> +}
> +
> +static int freeze_processes(void)
> +{
> +	int i, ret, fd, exit_code = -1;
> +	char path[PATH_MAX];
> +	const char *state = thawed;
> +	FILE *f;
> +
> +	snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> +	fd = open(path, O_RDWR);
> +	if (fd < 0) {
> +		pr_perror("Unable to open %s", path);
> +		return -1;
> +	}
> +	state = get_freezer_state(fd);
> +	if (!state) {
> +		close(fd);
> +		return -1;
> +	}
> +	if (state == thawed)
> +		freezer_thawed = true;
> +
> +	lseek(fd, 0, SEEK_SET);
> +	if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
> +		pr_perror("Unable to freeze tasks");
> +		close(fd);
> +		return -1;
> +	}
> +
> +	/*
> +	 * There is not way to wait a specified state, so we need to poll the
> +	 * freezer.state.
> +	 */
> +	for (i = 0; i < NR_ATTEMPTS; i++) {
> +		struct timespec req = {};
> +
> +		/*
> +		 * New tasks can appear while a freezer state isn't
> +		 * frozen, so we need to catch all new tasks.
> +		 */
> +		snprintf(path, sizeof(path), "%s/tasks", opts.freeze_cgroup);
> +		f = fopen(path, "r");
> +		if (f == NULL) {
> +			pr_perror("Unable to open %s", path);
> +			goto err;
> +		}
> +		while (fgets(path, sizeof(path), f)) {
> +			pid_t pid;
> +
> +			pid = atoi(path);
> +
> +			ret = wait4(pid, NULL, __WALL | WNOHANG, NULL);
> +			if (ret == 0) /* skip already seized tasks */
> +				continue;

Please, put a comment here explaining what and why you expect from this wait4 call.

> +			if (seize_catch_task(pid)) {
> +				/* fails when meets a zombie */
> +				fclose(f);
> +				if (state == frozen)
> +					goto err;
> +			}
> +		}
> +		fclose(f);
> +
> +		if (state == frozen)
> +			break;
> +
> +		state = get_freezer_state(fd);
> +		if (!state)
> +			goto err;
> +
> +		if (state == frozen) {
> +			/*
> +			 * Enumerate all tasks one more time to collect all new
> +			 * tasks, which can be born while the cgroup is being frozen.
> +			 */
> +
> +			continue;
> +		}
> +
> +		req.tv_nsec = 10000000 * i;

What if it overflows?

> +		nanosleep(&req, NULL);
> +	}
> +
> +	if (i == NR_ATTEMPTS) {
> +		pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
> +		goto err;
> +	}
> +
> +	exit_code = 0;
> +err:
> +	if (exit_code == 0 || freezer_thawed) {
> +		lseek(fd, 0, SEEK_SET);
> +		if (write(fd, thawed, sizeof(thawed)) != sizeof(thawed)) {
> +			pr_perror("Unable to thaw tasks");
> +			exit_code = -1;
> +		}
> +	}
> +	if (close(fd)) {
> +		pr_perror("Unable to thaw tasks");
> +		return -1;
> +	}
> +
> +	return exit_code;
> +}
> +
>  static inline bool child_collected(struct pstree_item *i, pid_t pid)
>  {
>  	struct pstree_item *c;
> @@ -58,8 +221,9 @@ static int collect_children(struct pstree_item *item)
>  			goto free;
>  		}
>  
> -		/* fails when meets a zombie */
> -		seize_catch_task(pid);
> +		if (!opts.freeze_cgroup)
> +			/* fails when meets a zombie */
> +			seize_catch_task(pid);
>  
>  		ret = seize_wait_task(pid, item->pid.real, &dmpi(c)->pi_creds);
>  		if (ret < 0) {
> @@ -146,6 +310,9 @@ void pstree_switch_state(struct pstree_item *root_item, int st)
>  {
>  	struct pstree_item *item = root_item;
>  
> +	if (st != TASK_DEAD)
> +		freezer_restore_state();
> +
>  	pr_info("Unfreezing tasks into %d\n", st);
>  	for_each_pstree_item(item)
>  		unseize_task_and_threads(item, st);
> @@ -210,7 +377,7 @@ static int collect_threads(struct pstree_item *item)
>  		pr_info("\tSeizing %d's %d thread\n",
>  				item->pid.real, pid);
>  
> -		if (seize_catch_task(pid))
> +		if (!opts.freeze_cgroup && seize_catch_task(pid))
>  			continue;
>  
>  		ret = seize_wait_task(pid, item_ppid(item), &dmpi(item)->pi_creds);
> @@ -257,6 +424,9 @@ static int collect_loop(struct pstree_item *item,
>  {
>  	int attempts = NR_ATTEMPTS, nr_inprogress = 1;
>  
> +	if (opts.freeze_cgroup)
> +		attempts = 2; /* collect tasks and check that we skip nothing */

Why 2?

> +
>  	/*
>  	 * While we scan the proc and seize the children/threads
>  	 * new ones can appear (with clone(CLONE_PARENT) or with
> @@ -315,6 +485,9 @@ int collect_pstree(pid_t pid)
>  {
>  	int ret;
>  
> +	if (opts.freeze_cgroup && freeze_processes())
> +		return -1;
> +

This should happen after timing_start below.

>  	timing_start(TIME_FREEZING);
>  
>  	root_item = alloc_pstree_item();
> @@ -323,7 +496,7 @@ int collect_pstree(pid_t pid)
>  
>  	root_item->pid.real = pid;
>  
> -	if (seize_catch_task(pid))
> +	if (!opts.freeze_cgroup && seize_catch_task(pid))
>  		goto err;
>  
>  	ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
> 



More information about the CRIU mailing list