[CRIU] [PATCH 1/2] restore: TASK_HELPERs live through restore stage

Tycho Andersen tycho.andersen at canonical.com
Wed Sep 10 07:32:47 PDT 2014


Hi Andrew,

On Tue, Sep 09, 2014 at 02:21:51PM +0400, Andrew Vagin wrote:
> Hi Tycho,
> 
> I highly recommend to execute tests. It's easy, you need to execute
> only one command "make test".
> 
> ns/static/session00 fails with this patch.

When I try to run this test, I get:

criu:/tmp/criu/test sudo ./zdtm.sh -r static/session00
Execute zdtm/live/static/session00
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o tcp.d tcp.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o ns.d ns.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o lock.d lock.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o streamutil.d streamutil.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o test.d test.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o parseargs.d parseargs.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o msg.d msg.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -MM -MP -o datagen.d datagen.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o datagen.o datagen.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o msg.o msg.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o parseargs.o parseargs.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o test.o test.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o streamutil.o streamutil.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o lock.o lock.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o ns.o ns.c
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
./arch/x86/include  -c -o tcp.o tcp.c
ar: creating libzdtmtst.a
a - datagen.o
a - msg.o
a - parseargs.o
a - test.o
a - streamutil.o
a - lock.o
a - ns.o
a - tcp.o
cc -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0  -iquote
../../lib/arch/x86/include -I../../lib   session00.c
../../lib/libzdtmtst.a   -o session00
./session00 --pidfile=session00.pid --outfile=session00.out
Dump 3866
Restore
Test: zdtm/live/static/session00, Result: FAIL
==================================== ERROR
====================================
Test: zdtm/live/static/session00, Namespace: 
Dump log   : /tmp/criu/test/dump/static/session00/3866/1/dump.log
--------------------------------- grep Error
---------------------------------
------------------------------------- END
-------------------------------------
Restore log: /tmp/criu/test/dump/static/session00/3866/1/restore.log
--------------------------------- grep Error
---------------------------------
(00.000700) Error (cr-restore.c:1393): Pid 3867 do not match expected
3866
(00.001033) Error (cr-restore.c:1152): 3867 exited, status=255
(00.001058) Error (cr-restore.c:1762): Restoring FAILED.
------------------------------------- END
-------------------------------------
================================= ERROR OVER
=================================

except that criu check says I'm (mostly) ok:

criu:/tmp/criu sudo ./criu check
Error (timerfd.c:56): timerfd: No timerfd support for c/r:
Inappropriate ioctl for device
Error (cr-check.c:269): fdinfo doesn't contain the mnt_id field

Any ideas?

Tycho

> [root at avagin-fc19-cr criu]# bash test/zdtm.sh ns/static/session00
> ================================= CRIU CHECK =================================
> Looks good.
> Execute zdtm/live/static/session00
> ./session00 --pidfile=session00.pid --outfile=session00.out
> /root/git/criu/test
> Dump 24400
> Restore
> Check results 24439
> Waiting...
> 10:18:41.865:     4: FAIL: session00.c:213: The process with pid 16 returns 256
>  (errno = 10 (Permission denied))
> 10:18:41.867:     4: ERR: session00.c:221: 10 isn't waited (errno = 0 (Permission denied))
> Test: zdtm/live/static/session00, Result: FAIL
> ==================================== ERROR ====================================
> Test: zdtm/live/static/session00, Namespace: 1
> Dump log   : /root/git/criu/test/dump/static/session00/24400/1/dump.log
> --------------------------------- grep Error ---------------------------------
> ------------------------------------- END -------------------------------------
> Restore log: /root/git/criu/test/dump/static/session00/24400/1/restore.log
> --------------------------------- grep Error ---------------------------------
> ------------------------------------- END -------------------------------------
> Output file: /root/git/criu/test/zdtm/live/static/session00.out
> ------------------------------------------------------------------------------
> 10:18:41.865:     4: FAIL: session00.c:213: The process with pid 16 returns 256
>  (errno = 10 (Permission denied))
> 10:18:41.867:     4: ERR: session00.c:221: 10 isn't waited (errno = 0 (Permission denied))
>   PID   SID COMMAND
>     1     1 session00
>     4     4 session00
>     7     7  \_ session00
>     8     4  \_ session00
>    11    11  \_ session00
>    12     4  |   \_ session00
>    13    13  \_ session00
>    14    14      \_ session00
>    15     4          \_ session00
>     6     4 session00
>    10     9 session00
>    16     1 ps
> 9 return 256
> Time to stop and check
>   PID   SID COMMAND
>     1     1 session00
>     4     4 session00
>    13    13  \_ session00
>    14    14  |   \_ session00
>    15     4  |       \_ session00
>    16     4  \_ criu <defunct>
>     7     7  \_ session00
>     8     4  \_ session00
>    11    11  \_ session00
>    12     4      \_ session00
>    10     9 session00
>     6     4 session00
>    17     1 ps
> kill(4, SIGTERM)
> ------------------------------------- END -------------------------------------
> ================================= ERROR OVER =================================
> 
> On Fri, Sep 05, 2014 at 02:38:04PM -0500, Tycho Andersen wrote:
> > In order to use TASK_HELPERS to open files from dead processes, they should
> > persist through the end of the restore phase, since that is when the fds are
> > set up.
> > 
> > Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> > ---
> >  cr-restore.c | 37 +++++++++++++++++++++++--------------
> >  1 file changed, 23 insertions(+), 14 deletions(-)
> > 
> > diff --git a/cr-restore.c b/cr-restore.c
> > index cefa78e..5f481b0 100644
> > --- a/cr-restore.c
> > +++ b/cr-restore.c
> > @@ -92,6 +92,7 @@ static int prepare_restorer_blob(void);
> >  static int prepare_rlimits(int pid, CoreEntry *core);
> >  static int prepare_posix_timers(int pid, CoreEntry *core);
> >  static int prepare_signals(int pid, CoreEntry *core);
> > +static int restore_switch_stage(int next_stage);
> >  
> >  static int root_as_sibling;
> >  
> > @@ -765,35 +766,42 @@ err:
> >  
> >  static int restore_one_alive_task(int pid, CoreEntry *core)
> >  {
> > +	int ret = -1;
> > +
> >  	pr_info("Restoring resources\n");
> >  
> >  	rst_mem_switch_to_private();
> >  
> > -	if (pstree_wait_helpers())
> > -		return -1;
> > -
> >  	if (prepare_fds(current))
> > -		return -1;
> > +		goto err;
> >  
> >  	if (prepare_file_locks(pid))
> > -		return -1;
> > +		goto err;
> >  
> >  	if (open_vmas(pid))
> > -		return -1;
> > +		goto err;
> >  
> >  	if (open_cores(pid, core))
> > -		return -1;
> > +		goto err;
> >  
> >  	if (prepare_signals(pid, core))
> > -		return -1;
> > +		goto err;
> >  
> >  	if (prepare_posix_timers(pid, core))
> > -		return -1;
> > +		goto err;
> >  
> >  	if (prepare_rlimits(pid, core) < 0)
> > -		return -1;
> > +		goto err;
> > +
> > +	if (sigreturn_restore(pid, core))
> > +		goto err;
> >  
> > -	return sigreturn_restore(pid, core);
> > +	ret = 0;
> > +err:
> > +	if (pstree_wait_helpers())
> > +		ret = -1;
> > +
> > +	return ret;
> >  }
> >  
> >  static void zombie_prepare_signals(void)
> > @@ -930,9 +938,9 @@ static int restore_one_task(int pid, CoreEntry *core)
> >  		ret = restore_one_alive_task(pid, core);
> >  	else if (current->state == TASK_DEAD)
> >  		ret = restore_one_zombie(pid, core);
> > -	else if (current->state == TASK_HELPER)
> > -		ret = 0;
> > -	else {
> > +	else if (current->state == TASK_HELPER) {
> > +		ret = restore_finish_stage(CR_STATE_RESTORE);
> > +	} else {
> >  		pr_err("Unknown state in code %d\n", (int)core->tc->task_state);
> >  		ret = -1;
> >  	}
> > @@ -1489,6 +1497,7 @@ static inline int stage_participants(int next_stage)
> >  	case CR_STATE_FORKING:
> >  		return task_entries->nr_tasks + task_entries->nr_helpers;
> >  	case CR_STATE_RESTORE:
> > +		return task_entries->nr_threads + task_entries->nr_helpers;
> >  	case CR_STATE_RESTORE_SIGCHLD:
> >  		return task_entries->nr_threads;
> >  	case CR_STATE_RESTORE_CREDS:
> > -- 
> > 1.9.1
> > 
> > _______________________________________________
> > CRIU mailing list
> > CRIU at openvz.org
> > https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list