[CRIU] [PATCH REBASED 3/3] ns: Make pid_ns helpers as children of criu main process
Andrei Vagin
avagin at virtuozzo.com
Tue Jul 11 20:34:31 MSK 2017
On Tue, Jul 11, 2017 at 12:02:27PM +0300, Kirill Tkhai wrote:
> On 11.07.2017 03:46, Andrei Vagin wrote:
> > On Thu, Jul 06, 2017 at 02:14:17PM +0300, Kirill Tkhai wrote:
> >> If a task, holding userns_sync_lock unexpectedly exits,
> >> criu will hang on error path in restore_root_task(),
> >> because it can't use usernsd to destroy them.
> >>
> >> Lets remove the intermediary: we'll create pid_ns helpers
> >> as children of criu main task, and criu main task will
> >> be able to use simple kill() to stop them.
> >>
> >> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> >> ---
> >> criu/cr-restore.c | 8 ++++++++
> >> criu/namespaces.c | 35 +++--------------------------------
> >> 2 files changed, 11 insertions(+), 32 deletions(-)
> >>
> >> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> >> index 965770f48..8d8349f32 100644
> >> --- a/criu/cr-restore.c
> >> +++ b/criu/cr-restore.c
> >> @@ -1470,6 +1470,7 @@ static inline int fork_with_pid(struct pstree_item *item)
> >> static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
> >> {
> >> int status, pid, exit;
> >> + struct ns_id *ns;
> >>
> >> while (1) {
> >> pid = waitpid(-1, &status, WNOHANG);
> >> @@ -1489,6 +1490,13 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
> >> break;
> >> }
> >>
> >> + if (!current) {
> >> + ns = handle_pid_ns_helper_exit(pid);
> >
> > Why is it out of the previous loop? What if two helpers exit together?
>
> As we destroy the helpers with SIGCHLD blocked in do_destroy_pidns_helpers(),
> a seeing of a one helper is already a error, and we should stop restore
> in this case.
>
> We mustn't see a helper in this function as we newer mustn't see any task
> there. The helper handle_pid_ns_helper_exit() is only for printing debug
> message to understand what happened.
Ok, I see. Pls, add something like this ^^^ in a comment before this
code. Thanks!
>
> >> + if (ns)
> >> + pr_err("Unexpected pid ns helper: pid=%d, ns_id=%d\n",
> >> + pid, ns->id);
> >> + }
> >> +
> >> if (exit)
> >> pr_err("%d exited, status=%d\n", pid, status);
> >> else
> > ...
> >
> > futex_abort_and_wake(&task_entries->nr_in_progress);
> >
> > it means that we always abort "restore", doesn't it?
>
> Yes, we break restore in this case.
>
> >> diff --git a/criu/namespaces.c b/criu/namespaces.c
> >> index 674b43820..fca125b6a 100644
> >> --- a/criu/namespaces.c
> >> +++ b/criu/namespaces.c
> >> @@ -1603,26 +1603,6 @@ struct ns_id *handle_pid_ns_helper_exit(pid_t real_pid)
> >> return ns;
> >> }
> >>
> >> -static void usernsd_handler(int signal, siginfo_t *siginfo, void *data)
> >> -{
> >> - pid_t pid = siginfo->si_pid;
> >> - struct ns_id *ns;
> >> - int status;
> >> -
> >> - while (pid) {
> >> - pid = waitpid(-1, &status, WNOHANG);
> >> - if (pid <= 0)
> >> - return;
> >> -
> >> - ns = handle_pid_ns_helper_exit(pid);
> >> - if (!ns)
> >> - pr_err("Spurious pid ns helper: pid=%d\n", pid);
> >> -
> >> - pr_err("%d finished unexpected: status=%d\n", pid, status);
> >> - futex_abort_and_wake(&task_entries->nr_in_progress);
> >> - }
> >> -}
> >> -
> >> static int usernsd_recv_transport(void *arg, int fd, pid_t pid)
> >> {
> >> if (install_service_fd(TRANSPORT_FD_OFF, fd) < 0) {
> >> @@ -1666,11 +1646,6 @@ static int usernsd(int sk)
> >> {
> >> pr_info("uns: Daemon started\n");
> >>
> >> - if (criu_signals_setup(usernsd_handler) < 0) {
> >> - pr_err("Can't setup handler\n");
> >> - return -1;
> >> - }
> >> -
> >> while (1) {
> >> struct unsc_msg um;
> >> static char msg[MAX_UNSFD_MSG_SIZE];
> >> @@ -2725,7 +2700,7 @@ static int do_create_pid_ns_helper(void *arg, int sk, pid_t unused_pid)
> >> unlock_last_pid();
> >> goto restore_ns;
> >> }
> >> - child = fork();
> >> + child = sys_clone_unified(CLONE_PARENT|SIGCHLD, NULL, NULL, NULL, 0);
> >> if (!child)
> >> exit(pid_ns_helper(ns, sk));
> >> saved_errno = errno;
> >> @@ -2787,7 +2762,7 @@ int create_pid_ns_helper(struct ns_id *ns)
> >> return 0;
> >> }
> >>
> >> -static int do_destroy_pid_ns_helpers(void *arg, int fd, pid_t unused)
> >> +static int do_destroy_pid_ns_helpers(void)
> >> {
> >> int status, sig_blocked = true, ret = 0;
> >> sigset_t sig_mask;
> >> @@ -2836,11 +2811,7 @@ int destroy_pid_ns_helpers(void)
> >> if (!(root_ns_mask & CLONE_NEWPID))
> >> return 0;
> >>
> >> - if (userns_call(do_destroy_pid_ns_helpers, 0, NULL, 0, -1) < 0) {
> >> - pr_err("Can't create pid_ns helper\n");
> >> - return -1;
> >> - }
> >> - return 0;
> >> + return do_destroy_pid_ns_helpers();
> >> }
> >>
> >> struct ns_desc pid_ns_desc = NS_DESC_ENTRY(CLONE_NEWPID, "pid", "pid_for_children");
> >>
More information about the CRIU
mailing list