[Devel] [PATCH] pcs7: freeze -- Wait for exiting tasks to complete
Cyrill Gorcunov
gorcunov at gmail.com
Fri Oct 7 11:07:06 PDT 2016
There is a small race between exitings tasks and cgroup freezer:
it can report a pid of exiting process which we will consider
as a zombie and refuse to checkpoint. Lets do a trick instead:
if we meets such task just wait a bit and repeate a freezing.
https://jira.sw.ru/browse/PSBM-53282
Signed-off-by: Cyrill Gorcunov <gorcunov at virtuozzo.com>
---
criu/seize.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)
diff --git a/criu/seize.c b/criu/seize.c
index f39412f..d6723c1 100644
--- a/criu/seize.c
+++ b/criu/seize.c
@@ -136,11 +136,17 @@ static int seize_cgroup_tree(char *root_path, const char *state)
if (stat(buf, &st) == -1 && errno == ENOENT)
continue;
- /* fails when meets a zombie */
+ /*
+ * fails when meets a zombie, or eixting process:
+ * there is a small race in a kernel -- the process
+ * may start exiting and we are trying to freeze it
+ * before it compete exit procedure. The caller simply
+ * should wait a bit and try freezing again.
+ */
pr_err("zombie %d (comm %s) found while seizing\n",
pid, __task_comm_info(pid));
fclose(f);
- return -1;
+ return -EPERM;
}
}
fclose(f);
@@ -153,6 +159,7 @@ static int seize_cgroup_tree(char *root_path, const char *state)
while ((de = readdir(dir))) {
struct stat st;
+ int ret;
if (dir_dots(de))
continue;
@@ -168,9 +175,10 @@ static int seize_cgroup_tree(char *root_path, const char *state)
if (!S_ISDIR(st.st_mode))
continue;
- if (seize_cgroup_tree(path, state) < 0) {
+ ret = seize_cgroup_tree(path, state);
+ if (ret < 0) {
closedir(dir);
- return -1;
+ return ret;
}
}
closedir(dir);
@@ -328,7 +336,7 @@ static int freeze_processes(void)
static const unsigned long step_ms = 100;
unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
- unsigned long i;
+ unsigned long i = 0;
const struct timespec req = {
.tv_nsec = step_ms * 1000000,
@@ -374,7 +382,7 @@ static int freeze_processes(void)
* not read @tasks pids while freezer in
* transition stage.
*/
- for (i = 0; i <= nr_attempts; i++) {
+ for (; i <= nr_attempts; i++) {
state = get_freezer_state(fd);
if (!state) {
close(fd);
@@ -398,7 +406,18 @@ static int freeze_processes(void)
pr_debug("freezing processes: %lu attempts done\n", i);
}
- exit_code = seize_cgroup_tree(opts.freeze_cgroup, state);
+ /*
+ * Pay attention on @i variable -- it's continuation.
+ */
+ for (; i <= nr_attempts; i++) {
+ exit_code = seize_cgroup_tree(opts.freeze_cgroup, state);
+ if (exit_code == -EPERM) {
+ if (alarm_timeouted())
+ goto err;
+ nanosleep(&req, NULL);
+ } else
+ break;
+ }
err:
if (exit_code == 0 || freezer_thawed) {
--
2.7.4
More information about the Devel
mailing list