[CRIU] [PATCH 1/2] restore: TASK_HELPERs live through restore stage

Andrew Vagin avagin at parallels.com
Wed Sep 10 08:00:29 PDT 2014


Hi, Tycho

On Wed, Sep 10, 2014 at 09:32:47AM -0500, Tycho Andersen wrote:
> Hi Andrew,
> 
> On Tue, Sep 09, 2014 at 02:21:51PM +0400, Andrew Vagin wrote:
> > Hi Tycho,
> > 
> > I highly recommend to execute tests. It's easy, you need to execute
> > only one command "make test".
> > 
> > ns/static/session00 fails with this patch.
> 
> When I try to run this test, I get:
> 
> criu:/tmp/criu/test sudo ./zdtm.sh -r static/session00

session00 can be executed only in a new set of namespaces, so you should
call:
bash test/zdtm.sh ns/static/session00

> Execute zdtm/live/static/session00
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o tcp.d tcp.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o ns.d ns.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o lock.d lock.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o streamutil.d streamutil.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o test.d test.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o parseargs.d parseargs.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o msg.d msg.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -MM -MP -o datagen.d datagen.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o datagen.o datagen.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o msg.o msg.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o parseargs.o parseargs.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o test.o test.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o streamutil.o streamutil.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o lock.o lock.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o ns.o ns.c
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ./arch/x86/include  -c -o tcp.o tcp.c
> ar: creating libzdtmtst.a
> a - datagen.o
> a - msg.o
> a - parseargs.o
> a - test.o
> a - streamutil.o
> a - lock.o
> a - ns.o
> a - tcp.o
> cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
> ../../lib/arch/x86/include -I../../lib   session00.c
> ../../lib/libzdtmtst.a   -o session00
> ./session00 --pidfile=session00.pid --outfile=session00.out
> Dump 3866
> Restore
> Test: zdtm/live/static/session00, Result: FAIL
> ==================================== ERROR
> ====================================
> Test: zdtm/live/static/session00, Namespace: 
> Dump log   : /tmp/criu/test/dump/static/session00/3866/1/dump.log
> --------------------------------- grep Error
> ---------------------------------
> ------------------------------------- END
> -------------------------------------
> Restore log: /tmp/criu/test/dump/static/session00/3866/1/restore.log
> --------------------------------- grep Error
> ---------------------------------
> (00.000700) Error (cr-restore.c:1393): Pid 3867 do not match expected
> 3866
> (00.001033) Error (cr-restore.c:1152): 3867 exited, status=255
> (00.001058) Error (cr-restore.c:1762): Restoring FAILED.
> ------------------------------------- END
> -------------------------------------
> ================================= ERROR OVER
> =================================
> 
> except that criu check says I'm (mostly) ok:
> 
> criu:/tmp/criu sudo ./criu check
> Error (timerfd.c:56): timerfd: No timerfd support for c/r:
> Inappropriate ioctl for device
> Error (cr-check.c:269): fdinfo doesn't contain the mnt_id field
> 
> Any ideas?
> 
> Tycho
> 
> > [root at avagin-fc19-cr criu]# bash test/zdtm.sh ns/static/session00
> > ================================= CRIU CHECK =================================
> > Looks good.
> > Execute zdtm/live/static/session00
> > ./session00 --pidfile=session00.pid --outfile=session00.out
> > /root/git/criu/test
> > Dump 24400
> > Restore
> > Check results 24439
> > Waiting...
> > 10:18:41.865:     4: FAIL: session00.c:213: The process with pid 16 returns 256
> >  (errno = 10 (Permission denied))
> > 10:18:41.867:     4: ERR: session00.c:221: 10 isn't waited (errno = 0 (Permission denied))
> > Test: zdtm/live/static/session00, Result: FAIL
> > ==================================== ERROR ====================================
> > Test: zdtm/live/static/session00, Namespace: 1
> > Dump log   : /root/git/criu/test/dump/static/session00/24400/1/dump.log
> > --------------------------------- grep Error ---------------------------------
> > ------------------------------------- END -------------------------------------
> > Restore log: /root/git/criu/test/dump/static/session00/24400/1/restore.log
> > --------------------------------- grep Error ---------------------------------
> > ------------------------------------- END -------------------------------------
> > Output file: /root/git/criu/test/zdtm/live/static/session00.out
> > ------------------------------------------------------------------------------
> > 10:18:41.865:     4: FAIL: session00.c:213: The process with pid 16 returns 256
> >  (errno = 10 (Permission denied))
> > 10:18:41.867:     4: ERR: session00.c:221: 10 isn't waited (errno = 0 (Permission denied))
> >   PID   SID COMMAND
> >     1     1 session00
> >     4     4 session00
> >     7     7  \_ session00
> >     8     4  \_ session00
> >    11    11  \_ session00
> >    12     4  |   \_ session00
> >    13    13  \_ session00
> >    14    14      \_ session00
> >    15     4          \_ session00
> >     6     4 session00
> >    10     9 session00
> >    16     1 ps
> > 9 return 256
> > Time to stop and check
> >   PID   SID COMMAND
> >     1     1 session00
> >     4     4 session00
> >    13    13  \_ session00
> >    14    14  |   \_ session00
> >    15     4  |       \_ session00
> >    16     4  \_ criu <defunct>
> >     7     7  \_ session00
> >     8     4  \_ session00
> >    11    11  \_ session00
> >    12     4      \_ session00
> >    10     9 session00
> >     6     4 session00
> >    17     1 ps
> > kill(4, SIGTERM)
> > ------------------------------------- END -------------------------------------
> > ================================= ERROR OVER =================================
> > 
> > On Fri, Sep 05, 2014 at 02:38:04PM -0500, Tycho Andersen wrote:
> > > In order to use TASK_HELPERS to open files from dead processes, they should
> > > persist through the end of the restore phase, since that is when the fds are
> > > set up.
> > > 
> > > Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> > > ---
> > >  cr-restore.c | 37 +++++++++++++++++++++++--------------
> > >  1 file changed, 23 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/cr-restore.c b/cr-restore.c
> > > index cefa78e..5f481b0 100644
> > > --- a/cr-restore.c
> > > +++ b/cr-restore.c
> > > @@ -92,6 +92,7 @@ static int prepare_restorer_blob(void);
> > >  static int prepare_rlimits(int pid, CoreEntry *core);
> > >  static int prepare_posix_timers(int pid, CoreEntry *core);
> > >  static int prepare_signals(int pid, CoreEntry *core);
> > > +static int restore_switch_stage(int next_stage);
> > >  
> > >  static int root_as_sibling;
> > >  
> > > @@ -765,35 +766,42 @@ err:
> > >  
> > >  static int restore_one_alive_task(int pid, CoreEntry *core)
> > >  {
> > > +	int ret = -1;
> > > +
> > >  	pr_info("Restoring resources\n");
> > >  
> > >  	rst_mem_switch_to_private();
> > >  
> > > -	if (pstree_wait_helpers())
> > > -		return -1;
> > > -
> > >  	if (prepare_fds(current))
> > > -		return -1;
> > > +		goto err;
> > >  
> > >  	if (prepare_file_locks(pid))
> > > -		return -1;
> > > +		goto err;
> > >  
> > >  	if (open_vmas(pid))
> > > -		return -1;
> > > +		goto err;
> > >  
> > >  	if (open_cores(pid, core))
> > > -		return -1;
> > > +		goto err;
> > >  
> > >  	if (prepare_signals(pid, core))
> > > -		return -1;
> > > +		goto err;
> > >  
> > >  	if (prepare_posix_timers(pid, core))
> > > -		return -1;
> > > +		goto err;
> > >  
> > >  	if (prepare_rlimits(pid, core) < 0)
> > > -		return -1;
> > > +		goto err;
> > > +
> > > +	if (sigreturn_restore(pid, core))
> > > +		goto err;
> > >  
> > > -	return sigreturn_restore(pid, core);
> > > +	ret = 0;
> > > +err:
> > > +	if (pstree_wait_helpers())
> > > +		ret = -1;
> > > +
> > > +	return ret;
> > >  }
> > >  
> > >  static void zombie_prepare_signals(void)
> > > @@ -930,9 +938,9 @@ static int restore_one_task(int pid, CoreEntry *core)
> > >  		ret = restore_one_alive_task(pid, core);
> > >  	else if (current->state == TASK_DEAD)
> > >  		ret = restore_one_zombie(pid, core);
> > > -	else if (current->state == TASK_HELPER)
> > > -		ret = 0;
> > > -	else {
> > > +	else if (current->state == TASK_HELPER) {
> > > +		ret = restore_finish_stage(CR_STATE_RESTORE);
> > > +	} else {
> > >  		pr_err("Unknown state in code %d\n", (int)core->tc->task_state);
> > >  		ret = -1;
> > >  	}
> > > @@ -1489,6 +1497,7 @@ static inline int stage_participants(int next_stage)
> > >  	case CR_STATE_FORKING:
> > >  		return task_entries->nr_tasks + task_entries->nr_helpers;
> > >  	case CR_STATE_RESTORE:
> > > +		return task_entries->nr_threads + task_entries->nr_helpers;
> > >  	case CR_STATE_RESTORE_SIGCHLD:
> > >  		return task_entries->nr_threads;
> > >  	case CR_STATE_RESTORE_CREDS:
> > > -- 
> > > 1.9.1
> > > 
> > > _______________________________________________
> > > CRIU mailing list
> > > CRIU at openvz.org
> > > https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list