[CRIU] [PATCH 4/4] dump: use freezer cgroup to seize processes (v3)
Pavel Emelyanov
xemul at parallels.com
Fri Aug 7 03:56:33 PDT 2015
On 08/06/2015 12:37 PM, Andrey Vagin wrote:
> Without using a freezer cgroup, we need to do a few iterations to catch
> all tasks, because a new tasks can be born. If new tasks appear faster
> than criu collects them, criu fails. The freezer cgroup allows to
> solve this problem.
>
> We freeze the freezer group, then attaches to tasks with ptrace and thaw
> the freezer cgroup. We suppose that all tasks which are going to be
> dumped in a specified freezer group.
>
> v2: fix comments from Christopher
> Reviewed-by: Christopher Covington <cov at codeaurora.org>
>
> v3: refactor task_seize
>
> Cc: Christopher Covington <cov at codeaurora.org>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> crtools.c | 6 ++
> include/cr_options.h | 1 +
> seize.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++++--
> 3 files changed, 184 insertions(+), 4 deletions(-)
>
> diff --git a/crtools.c b/crtools.c
> index 6af6080..9096420 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -235,6 +235,7 @@ int main(int argc, char *argv[], char *envp[])
> { "enable-fs", required_argument, 0, 1065 },
> { "enable-external-sharing", no_argument, 0, 1066 },
> { "enable-external-masters", no_argument, 0, 1067 },
> + { "freeze-cgroup", required_argument, 0, 1068 },
> { },
> };
>
> @@ -465,6 +466,9 @@ int main(int argc, char *argv[], char *envp[])
> case 1067:
> opts.enable_external_masters = true;
> break;
> + case 1068:
> + opts.freeze_cgroup = optarg;
> + break;
> case 'M':
> {
> char *aux;
> @@ -676,6 +680,8 @@ usage:
> " 'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n"
> " --exec-cmd execute the command specified after '--' on successful\n"
> " restore making it the parent of the restored process\n"
> +" --freeze-cgroup\n"
> +" use cgroup freezer to collect processes\n"
> "\n"
> "* Special resources support:\n"
> " -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> diff --git a/include/cr_options.h b/include/cr_options.h
> index 19c2f77..f981806 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -57,6 +57,7 @@ struct cr_options {
> char *output;
> char *root;
> char *pidfile;
> + char *freeze_cgroup;
> struct list_head veth_pairs;
> struct list_head scripts;
> struct list_head ext_mounts;
> diff --git a/seize.c b/seize.c
> index 86df3f0..ab43ad3 100644
> --- a/seize.c
> +++ b/seize.c
> @@ -18,6 +18,169 @@
>
> #define NR_ATTEMPTS 5
>
> +const char frozen[] = "FROZEN";
> +const char freezing[] = "FREEZING";
> +const char thawed[] = "THAWED";
> +
> +static const char *get_freezer_state(int fd)
> +{
> + int ret;
> + char path[PATH_MAX];
> +
> + lseek(fd, 0, SEEK_SET);
> + ret = read(fd, path, sizeof(path) - 1);
> + if (ret <= 0) {
> + pr_perror("Unable to get a current state");
> + goto err;
> + }
> + if (path[ret - 1] == '\n')
> + path[ret - 1] = 0;
> + else
> + path[ret] = 0;
> +
> + pr_debug("freezer.state=%s\n", path);
> + if (strcmp(path, frozen) == 0)
> + return frozen;
> + if (strcmp(path, freezing) == 0)
> + return freezing;
> + if (strcmp(path, thawed) == 0)
> + return thawed;
> +
> + pr_err("Unknown freezer state: %s", path);
> +err:
> + return NULL;
> +}
> +
> +static bool freezer_thawed;
> +
> +static int freezer_restore_state(void)
> +{
> + int fd;
> + char path[PATH_MAX];
> +
> + if (!opts.freeze_cgroup || freezer_thawed)
The freezer_thawed is set to true when you change it into frozen
state, why return 0 here?
> + return 0;
> +
> + snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> + fd = open(path, O_RDWR);
> + if (fd < 0) {
> + pr_perror("Unable to open %s", path);
> + return -1;
> + }
> +
> + if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
You always put freezer cgroup into frozen state here. Why?
> + pr_perror("Unable to freeze tasks");
> + close(fd);
> + return -1;
> + }
> + close(fd);
> + return 0;
> +}
> +
> +static int freeze_processes(void)
> +{
> + int i, ret, fd, exit_code = -1;
> + char path[PATH_MAX];
> + const char *state = thawed;
> + FILE *f;
> +
> + snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> + fd = open(path, O_RDWR);
> + if (fd < 0) {
> + pr_perror("Unable to open %s", path);
> + return -1;
> + }
> + state = get_freezer_state(fd);
> + if (!state) {
> + close(fd);
> + return -1;
> + }
> + if (state == thawed)
> + freezer_thawed = true;
> +
> + lseek(fd, 0, SEEK_SET);
> + if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
> + pr_perror("Unable to freeze tasks");
> + close(fd);
> + return -1;
> + }
> +
> + /*
> + * There is not way to wait a specified state, so we need to poll the
> + * freezer.state.
> + */
> + for (i = 0; i < NR_ATTEMPTS; i++) {
> + struct timespec req = {};
> +
> + /*
> + * New tasks can appear while a freezer state isn't
> + * frozen, so we need to catch all new tasks.
> + */
> + snprintf(path, sizeof(path), "%s/tasks", opts.freeze_cgroup);
> + f = fopen(path, "r");
> + if (f == NULL) {
> + pr_perror("Unable to open %s", path);
> + goto err;
> + }
> + while (fgets(path, sizeof(path), f)) {
> + pid_t pid;
> +
> + pid = atoi(path);
> +
> + ret = wait4(pid, NULL, __WALL | WNOHANG, NULL);
> + if (ret == 0) /* skip already seized tasks */
> + continue;
Please, put a comment here explaining what and why you expect from this wait4 call.
> + if (seize_catch_task(pid)) {
> + /* fails when meets a zombie */
> + fclose(f);
> + if (state == frozen)
> + goto err;
> + }
> + }
> + fclose(f);
> +
> + if (state == frozen)
> + break;
> +
> + state = get_freezer_state(fd);
> + if (!state)
> + goto err;
> +
> + if (state == frozen) {
> + /*
> + * Enumerate all tasks one more time to collect all new
> + * tasks, which can be born while the cgroup is being frozen.
> + */
> +
> + continue;
> + }
> +
> + req.tv_nsec = 10000000 * i;
What if it overflows?
> + nanosleep(&req, NULL);
> + }
> +
> + if (i == NR_ATTEMPTS) {
> + pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
> + goto err;
> + }
> +
> + exit_code = 0;
> +err:
> + if (exit_code == 0 || freezer_thawed) {
> + lseek(fd, 0, SEEK_SET);
> + if (write(fd, thawed, sizeof(thawed)) != sizeof(thawed)) {
> + pr_perror("Unable to thaw tasks");
> + exit_code = -1;
> + }
> + }
> + if (close(fd)) {
> + pr_perror("Unable to thaw tasks");
> + return -1;
> + }
> +
> + return exit_code;
> +}
> +
> static inline bool child_collected(struct pstree_item *i, pid_t pid)
> {
> struct pstree_item *c;
> @@ -58,8 +221,9 @@ static int collect_children(struct pstree_item *item)
> goto free;
> }
>
> - /* fails when meets a zombie */
> - seize_catch_task(pid);
> + if (!opts.freeze_cgroup)
> + /* fails when meets a zombie */
> + seize_catch_task(pid);
>
> ret = seize_wait_task(pid, item->pid.real, &dmpi(c)->pi_creds);
> if (ret < 0) {
> @@ -146,6 +310,9 @@ void pstree_switch_state(struct pstree_item *root_item, int st)
> {
> struct pstree_item *item = root_item;
>
> + if (st != TASK_DEAD)
> + freezer_restore_state();
> +
> pr_info("Unfreezing tasks into %d\n", st);
> for_each_pstree_item(item)
> unseize_task_and_threads(item, st);
> @@ -210,7 +377,7 @@ static int collect_threads(struct pstree_item *item)
> pr_info("\tSeizing %d's %d thread\n",
> item->pid.real, pid);
>
> - if (seize_catch_task(pid))
> + if (!opts.freeze_cgroup && seize_catch_task(pid))
> continue;
>
> ret = seize_wait_task(pid, item_ppid(item), &dmpi(item)->pi_creds);
> @@ -257,6 +424,9 @@ static int collect_loop(struct pstree_item *item,
> {
> int attempts = NR_ATTEMPTS, nr_inprogress = 1;
>
> + if (opts.freeze_cgroup)
> + attempts = 2; /* collect tasks and check that we skip nothing */
Why 2?
> +
> /*
> * While we scan the proc and seize the children/threads
> * new ones can appear (with clone(CLONE_PARENT) or with
> @@ -315,6 +485,9 @@ int collect_pstree(pid_t pid)
> {
> int ret;
>
> + if (opts.freeze_cgroup && freeze_processes())
> + return -1;
> +
This should happen after timing_start below.
> timing_start(TIME_FREEZING);
>
> root_item = alloc_pstree_item();
> @@ -323,7 +496,7 @@ int collect_pstree(pid_t pid)
>
> root_item->pid.real = pid;
>
> - if (seize_catch_task(pid))
> + if (!opts.freeze_cgroup && seize_catch_task(pid))
> goto err;
>
> ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
>
More information about the CRIU
mailing list