[CRIU] [PATCH 4/4] dump: use freezer cgroup to seize processes (v3)

Andrew Vagin avagin at gmail.com
Mon Aug 10 04:22:57 PDT 2015


On Fri, Aug 07, 2015 at 01:56:33PM +0300, Pavel Emelyanov wrote:
> On 08/06/2015 12:37 PM, Andrey Vagin wrote:
> > Without using a freezer cgroup, we need to do a few iterations to catch
> > all tasks, because a new tasks can be born. If new tasks appear faster
> > than criu collects them, criu fails. The freezer cgroup allows to
> > solve this problem.
> > 
> > We freeze the freezer group, then attaches to tasks with ptrace and thaw
> > the freezer cgroup. We suppose that all tasks which are going to be
> > dumped in a specified freezer group.
> > 
> > v2: fix comments from Christopher
> > Reviewed-by: Christopher Covington <cov at codeaurora.org>
> > 
> > v3: refactor task_seize
> > 
> > Cc: Christopher Covington <cov at codeaurora.org>
> > Signed-off-by: Andrey Vagin <avagin at openvz.org>
> > ---
> >  crtools.c            |   6 ++
> >  include/cr_options.h |   1 +
> >  seize.c              | 181 +++++++++++++++++++++++++++++++++++++++++++++++++--
> >  3 files changed, 184 insertions(+), 4 deletions(-)
> > 
> > diff --git a/crtools.c b/crtools.c
> > index 6af6080..9096420 100644
> > --- a/crtools.c
> > +++ b/crtools.c
> > @@ -235,6 +235,7 @@ int main(int argc, char *argv[], char *envp[])
> >  		{ "enable-fs",			required_argument,	0, 1065 },
> >  		{ "enable-external-sharing", 	no_argument, 		0, 1066 },
> >  		{ "enable-external-masters", 	no_argument, 		0, 1067 },
> > +		{ "freeze-cgroup",		required_argument,	0, 1068 },
> >  		{ },
> >  	};
> >  
> > @@ -465,6 +466,9 @@ int main(int argc, char *argv[], char *envp[])
> >  		case 1067:
> >  			opts.enable_external_masters = true;
> >  			break;
> > +		case 1068:
> > +			opts.freeze_cgroup = optarg;
> > +			break;
> >  		case 'M':
> >  			{
> >  				char *aux;
> > @@ -676,6 +680,8 @@ usage:
> >  "                        'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n"
> >  "     --exec-cmd         execute the command specified after '--' on successful\n"
> >  "                        restore making it the parent of the restored process\n"
> > +"  --freeze-cgroup\n"
> > +"                        use cgroup freezer to collect processes\n"
> >  "\n"
> >  "* Special resources support:\n"
> >  "  -x|--" USK_EXT_PARAM "inode,.." "      allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> > diff --git a/include/cr_options.h b/include/cr_options.h
> > index 19c2f77..f981806 100644
> > --- a/include/cr_options.h
> > +++ b/include/cr_options.h
> > @@ -57,6 +57,7 @@ struct cr_options {
> >  	char			*output;
> >  	char			*root;
> >  	char			*pidfile;
> > +	char			*freeze_cgroup;
> >  	struct list_head	veth_pairs;
> >  	struct list_head	scripts;
> >  	struct list_head	ext_mounts;
> > diff --git a/seize.c b/seize.c
> > index 86df3f0..ab43ad3 100644
> > --- a/seize.c
> > +++ b/seize.c
> > @@ -18,6 +18,169 @@
> >  
> >  #define NR_ATTEMPTS 5
> >  
> > +const char frozen[]	= "FROZEN";
> > +const char freezing[]	= "FREEZING";
> > +const char thawed[]	= "THAWED";
> > +
> > +static const char *get_freezer_state(int fd)
> > +{
> > +	int ret;
> > +	char path[PATH_MAX];
> > +
> > +	lseek(fd, 0, SEEK_SET);
> > +	ret = read(fd, path, sizeof(path) - 1);
> > +	if (ret <= 0) {
> > +		pr_perror("Unable to get a current state");
> > +		goto err;
> > +	}
> > +	if (path[ret - 1] == '\n')
> > +		path[ret - 1] = 0;
> > +	else
> > +		path[ret] = 0;
> > +
> > +	pr_debug("freezer.state=%s\n", path);
> > +	if (strcmp(path, frozen) == 0)
> > +		return frozen;
> > +	if (strcmp(path, freezing) == 0)
> > +		return freezing;
> > +	if (strcmp(path, thawed) == 0)
> > +		return thawed;
> > +
> > +	pr_err("Unknown freezer state: %s", path);
> > +err:
> > +	return NULL;
> > +}
> > +
> > +static bool freezer_thawed;
> > +
> > +static int freezer_restore_state(void)
> > +{
> > +	int fd;
> > +	char path[PATH_MAX];
> > +
> > +	if (!opts.freeze_cgroup || freezer_thawed)
> 
> The freezer_thawed is set to true when you change it into frozen
> state, why return 0 here?

freezer_restore_state is called after freeze_processes and the freezer
is thawed in this moment.

> 
> > +		return 0;
> > +
> > +	snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> > +	fd = open(path, O_RDWR);
> > +	if (fd < 0) {
> > +		pr_perror("Unable to open %s", path);
> > +		return -1;
> > +	}
> > +
> > +	if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
> 
> You always put freezer cgroup into frozen state here. Why?
> 
> > +			pr_perror("Unable to freeze tasks");
> > +			close(fd);
> > +			return -1;
> > +	}
> > +	close(fd);
> > +	return 0;
> > +}
> > +
> > +static int freeze_processes(void)
> > +{
> > +	int i, ret, fd, exit_code = -1;
> > +	char path[PATH_MAX];
> > +	const char *state = thawed;
> > +	FILE *f;
> > +
> > +	snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> > +	fd = open(path, O_RDWR);
> > +	if (fd < 0) {
> > +		pr_perror("Unable to open %s", path);
> > +		return -1;
> > +	}
> > +	state = get_freezer_state(fd);
> > +	if (!state) {
> > +		close(fd);
> > +		return -1;
> > +	}
> > +	if (state == thawed)
> > +		freezer_thawed = true;
> > +
> > +	lseek(fd, 0, SEEK_SET);
> > +	if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
> > +		pr_perror("Unable to freeze tasks");
> > +		close(fd);
> > +		return -1;
> > +	}
> > +
> > +	/*
> > +	 * There is not way to wait a specified state, so we need to poll the
> > +	 * freezer.state.
> > +	 */
> > +	for (i = 0; i < NR_ATTEMPTS; i++) {
> > +		struct timespec req = {};
> > +
> > +		/*
> > +		 * New tasks can appear while a freezer state isn't
> > +		 * frozen, so we need to catch all new tasks.
> > +		 */
> > +		snprintf(path, sizeof(path), "%s/tasks", opts.freeze_cgroup);
> > +		f = fopen(path, "r");
> > +		if (f == NULL) {
> > +			pr_perror("Unable to open %s", path);
> > +			goto err;
> > +		}
> > +		while (fgets(path, sizeof(path), f)) {
> > +			pid_t pid;
> > +
> > +			pid = atoi(path);
> > +
> > +			ret = wait4(pid, NULL, __WALL | WNOHANG, NULL);
> > +			if (ret == 0) /* skip already seized tasks */

here is a comment what you asked		^^^^^^^^^^^^^^^^^^^^ ;)
I will add more details.
> > +				continue;
> 
> Please, put a comment here explaining what and why you expect from this wait4 call.
> 


More information about the CRIU mailing list