[CRIU] is CR_STATE_RESTORE_CREDS necessary?

Tue Dec 15 01:26:16 PST 2015

Hi Tycho,

Coull you take a look at the attached patch? Does it fix your problem?

On Mon, Dec 14, 2015 at 09:07:43AM -0700, Tycho Andersen wrote:
> On Mon, Dec 14, 2015 at 10:14:49AM +0300, Andrew Vagin wrote:
> > On Fri, Dec 11, 2015 at 08:09:57AM -0700, Tycho Andersen wrote:
> > > On Fri, Dec 11, 2015 at 05:48:42PM +0300, Andrew Vagin wrote:
> > > > On Fri, Dec 11, 2015 at 12:11:10AM +0300, Pavel Emelyanov wrote:
> > > > > Yes, that's clear -- we need global final sync point before total
> > > > > unfreeze and resume.
> > > 
> > > Why doesn't CR_STATE_COMPLETE work for this? Doesn't the restorer wait
> > > once it finishes CR_STATE_RESTORE_CREDS until it gets the signal for
> > > CR_STATE_COMPLETE?
> > 
> > Because CRIU doesn't wait when the CR_STATE_COMPLETE stage will be
> > completed.
> 
> Oh, duh :)
> 
> > > But this is really because of the network, not because of the creds.
> > > What if we s/CR_STATE_RESTORE_CREDS/CR_STATE_NETWORK_UNLOCK, and in
> > > the restorer only do rst_tcp_socks_all() here, and move all the creds
> > > restore after CR_STATE_NETWORK_UNLOCK, when the main task sends
> > > CR_STATE_COMPLETE.
> > 
> > I don't understand this. Do you want to restore creds after
> > CR_STATE_COMPLETE?
> 
> No, I just want to restore creds after restore_seccomp, but that needs
> seccomp suspended in order to work. Here's three commits:
> 
> https://github.com/tych0/criu/commits/seccomp-setuid
> 
> the first one adds a setuid() call to make it fail, the second moves
> the restore_seccomp call to the "right" place causing it to pass
> again. But then if we change the seccomp policy slightly, we can get
> it to fail.
> 
> What I'm after is just a way to solve this problem.
> 
> > > 
> > > The one problem this doesn't solve is someone mentioned in the thread
> > > that there was an issue about resuming some tasks before other tasks
> > > had their creds restored. I didn't understand this point, isn't this
> > > prevented by the wait on __NR_rt_sigreturn (except in the case where
> > > criu is itself ptraced, but I'm ignoring that one because I don't
> > > think it's a use case other than for debugging).
> > 
> > Pls, don't ignore this usecases, it is enought critical to take it
> > into account.
> 
> As a real use case, or just for debugging? For just debugging, I think
> it is ok if some things don't work correctly in this state right?
> 
> Tycho
-------------- next part --------------
>From 1189841f8e78eb93d771dbce468afffa6a4fd879 Mon Sep 17 00:00:00 2001
From: Andrew Vagin <avagin at virtuozzo.com>
Date: Tue, 15 Dec 2015 12:20:17 +0300
Subject: [PATCH] ptrace: start tracing processes before restoring creds

Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
 cr-restore.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 47 insertions(+), 14 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index aaa9bf2..882fd5d 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1645,13 +1645,13 @@ static int restore_switch_stage(int next_stage)
 	return restore_wait_inprogress_tasks();
 }
 
-static int attach_to_tasks(bool root_seized, enum trace_flags *flag)
+static int attach_to_tasks(bool root_seized)
 {
 	struct pstree_item *item;
 
 	for_each_pstree_item(item) {
 		pid_t pid = item->pid.real;
-		int status, i, ret;
+		int status, i;
 
 		if (!task_alive(item))
 			continue;
@@ -1663,21 +1663,16 @@ static int attach_to_tasks(bool root_seized, enum trace_flags *flag)
 			pid = item->threads[i].real;
 
 			if (item != root_item || !root_seized || i != 0) {
-				if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
+				if (ptrace(PTRACE_SEIZE, pid, 0, 0)) {
 					pr_perror("Can't attach to %d", pid);
 					return -1;
 				}
-			} else {
-				/*
-				 * Root item is SEIZE-d, so we only need
-				 * to stop one (INTERRUPT) to make wait4
-				 * and SYSCALL below work.
-				 */
-				if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
-					pr_perror("Can't interrupt the %d task", pid);
-					return -1;
-				}
 			}
+			if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
+				pr_perror("Can't interrupt the %d task", pid);
+				return -1;
+			}
+
 
 			if (wait4(pid, &status, __WALL, NULL) != pid) {
 				pr_perror("waitpid(%d) failed", pid);
@@ -1694,6 +1689,43 @@ static int attach_to_tasks(bool root_seized, enum trace_flags *flag)
 			if (rsti(item)->has_seccomp && suspend_seccomp(pid) < 0)
 				pr_err("failed to suspend seccomp, restore will probably fail...\n");
 
+			if (ptrace(PTRACE_CONT, pid, NULL, NULL) ) {
+				pr_perror("Unable to resume %d", pid);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int catch_tasks(bool root_seized, enum trace_flags *flag)
+{
+	struct pstree_item *item;
+
+	for_each_pstree_item(item) {
+		pid_t pid = item->pid.real;
+		int status, i, ret;
+
+		if (!task_alive(item))
+			continue;
+
+		if (parse_threads(item->pid.real, &item->threads, &item->nr_threads))
+			return -1;
+
+		for (i = 0; i < item->nr_threads; i++) {
+			pid = item->threads[i].real;
+
+			if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
+				pr_perror("Can't interrupt the %d task", pid);
+				return -1;
+			}
+
+			if (wait4(pid, &status, __WALL, NULL) != pid) {
+				pr_perror("waitpid(%d) failed", pid);
+				return -1;
+			}
+
 			ret = ptrace_stop_pie(pid, rsti(item)->breakpoint, flag);
 			if (ret < 0)
 				return -1;
@@ -1942,13 +1974,14 @@ static int restore_root_task(struct pstree_item *init)
 	 * -------------------------------------------------------------
 	 * Below this line nothing should fail, because network is unlocked
 	 */
+	attach_to_tasks(root_as_sibling);
 
 	ret = restore_switch_stage(CR_STATE_RESTORE_CREDS);
 	BUG_ON(ret);
 
 	timing_stop(TIME_RESTORE);
 
-	ret = attach_to_tasks(root_as_sibling, &flag);
+	ret = catch_tasks(root_as_sibling, &flag);
 
 	pr_info("Restore finished successfully. Resuming tasks.\n");
 	futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
-- 
2.4.3