[CRIU] [PATCH 4/4] restore: Fix hang if root task is waiting on zombie
Andrey Vagin
avagin at gmail.com
Tue Dec 11 21:06:10 MSK 2018
On Tue, Dec 11, 2018 at 12:36:29PM +0300, Cyrill Gorcunov wrote:
> On Mon, Dec 10, 2018 at 11:07:09PM -0800, Andrey Vagin wrote:
> > > A zombie is not reparented to us yet.
> >
> > This means that we are waiting when a process (helper) will die and its
> > children (zombies) will be reparanted to the init process.
> >
> > If nr_in_progress is 1, this means that there is no processes which are
> > going to die, doesn't it? It the answer is yes, what are we waiting
> > here?
>
> If nr_in_progress is 1 it means the zombie processes already decremented
> nr_in_progress but may not be yet reparented to us, because we do decrement
> nr_in_progress first and only then call kill(self) in zombie restore code.
>
> for (i = 0; i < task_args->zombies_n; i++) {
> int ret, nr_in_progress;
>
> nr_in_progress = futex_get(&task_entries_local->nr_in_progress);
>
> ret = sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL);
> if (ret == -ECHILD) {
> /* A process isn't reparented to this task yet.
> * Let's wait when someone complete this stage
> * and try again.
> */
> --> futex_wait_while_eq(&task_entries_local->nr_in_progress,
> nr_in_progress);
> i--;
> continue;
> }
>
> The nr_in_progress is fetched earlier and equal to 1. Then we call for waitid
> and got -ECHILD which resulted that we're sitting in futex wait forever. That
> is what I've been notified about when investigating a bug. Unfortunately I
> lost the container failing and was unable to recreate a local test case.
> I suspect there is a race window between signal delivery and nr_in_progress
> updating.
This hack isn't about zombies of this process, it is used to wait when
other tasks complete waiting their helpers and zombies. nr_in_progress
is descrimented after wait_helpers and wait_zombies.
if (wait_helpers(args) < 0)
goto core_restore_end;
if (wait_zombies(args) < 0)
goto core_restore_end;
...
restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD);
The problem here is that we don't take into account that zombies can be
reparented from other zombies...
>
> Still since I've no local reproduction maybe it worth simply let this patch
> flying around and if it triggers someday we will back to the question.
Take a look at the attached test
-------------- next part --------------
>From c2e9864c1236aaa35266e7596f57ada9d2cb8421 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin at gmail.com>
Date: Tue, 11 Dec 2018 20:45:01 +0300
Subject: [PATCH] test
Signed-off-by: Andrei Vagin <avagin at gmail.com>
---
test/zdtm/lib/test.c | 8 +++++
test/zdtm/static/Makefile | 1 +
test/zdtm/static/zombie02.c | 69 ++++++++++++++++++++++++++++++++++++++++++
test/zdtm/static/zombie02.desc | 1 +
4 files changed, 79 insertions(+)
create mode 100644 test/zdtm/static/zombie02.c
create mode 100644 test/zdtm/static/zombie02.desc
diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c
index a1bdfc1b4..839db8092 100644
--- a/test/zdtm/lib/test.c
+++ b/test/zdtm/lib/test.c
@@ -295,6 +295,14 @@ void test_init(int argc, char **argv)
futex_init(&test_shared_state->stage);
futex_set(&test_shared_state->stage, TEST_INIT_STAGE);
+
+ if (getenv("ZDTM_PIDNS"))
+ pr_err("newpid\n");
+ if (getenv("ZDTM_PIDNS") && unshare(CLONE_NEWPID)) {
+ pr_perror("Unable to create pidns");
+ exit(1);
+ }
+
pid = fork();
if (pid < 0) {
pr_perror("Daemonizing failed");
diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile
index 7b8d66377..c5d934e5a 100644
--- a/test/zdtm/static/Makefile
+++ b/test/zdtm/static/Makefile
@@ -11,6 +11,7 @@ TST_NOFILE := \
wait00 \
zombie00 \
zombie01 \
+ zombie02 \
fpu00 \
fpu01 \
fpu02 \
diff --git a/test/zdtm/static/zombie02.c b/test/zdtm/static/zombie02.c
new file mode 100644
index 000000000..5f53b3d98
--- /dev/null
+++ b/test/zdtm/static/zombie02.c
@@ -0,0 +1,69 @@
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <string.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "See if we can wait() for a zombified child after migration";
+const char *test_author = "Roman Kagan <rkagan at parallels.com>";
+
+int main(int argc, char ** argv)
+{
+ int status;
+ int pid_pipe[2];
+ pid_t pid, pid_c;
+ siginfo_t siginfo;
+
+ setenv("ZDTM_PIDNS", "1", 1);
+ test_init(argc, argv);
+
+ if (pipe(pid_pipe))
+ return 1;
+
+ pid = fork();
+ if (pid == 0) {
+ setsid();
+ pid_c = fork();
+ if (pid_c == 0)
+ return 0;
+ if (write(pid_pipe[1], &pid_c, sizeof(pid_c)) != sizeof(pid_c))
+ return 1;
+ if (waitid(P_PID, pid_c, &siginfo, WNOWAIT | WEXITED) == -1) {
+ pr_perror("Unable to wait %d", pid_c);
+ exit(1);
+ }
+ return 0;
+ }
+ close(pid_pipe[1]);
+
+ if (read(pid_pipe[0], &pid_c, sizeof(pid_c)) != sizeof(pid_c))
+ return 1;
+
+ if (waitid(P_PID, pid, &siginfo, WNOWAIT | WEXITED) == -1) {
+ pr_perror("Unable to wait %d", pid);
+ exit(1);
+ }
+ if (waitid(P_PID, pid_c, &siginfo, WNOWAIT | WEXITED) == -1) {
+ pr_perror("Unable to wait %d", pid_c);
+ exit(1);
+ }
+
+ test_daemon();
+ test_waitsig();
+
+ if (waitpid(pid, &status, 0) == -1) {
+ pr_perror("Unable to wait %d", pid);
+ exit(1);
+ }
+ if (waitpid(pid_c, &status, 0) == -1) {
+ pr_perror("Unable to wait %d", pid_c);
+ exit(1);
+ }
+
+ pass();
+ return 0;
+}
diff --git a/test/zdtm/static/zombie02.desc b/test/zdtm/static/zombie02.desc
new file mode 100644
index 000000000..fa2c82d08
--- /dev/null
+++ b/test/zdtm/static/zombie02.desc
@@ -0,0 +1 @@
+{'flavor': 'h', 'flags': 'suid'}
--
2.14.5
More information about the CRIU
mailing list