[CRIU] [PATCH 3/3] test: check exit codes for criu page-server and criu lazy-pages
Pavel Emelyanov
xemul at virtuozzo.com
Wed Jan 18 00:10:46 PST 2017
On 01/11/2017 09:51 AM, Mike Rapoport wrote:
> On Wed, Jan 11, 2017 at 12:21:45AM +0300, Andrei Vagin wrote:
>> From: Andrei Vagin <avagin at virtuozzo.com>
>>
>> Cc: Mike Rapoport <rppt at linux.vnet.ibm.com>
>> Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
>> ---
>> test/zdtm.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++------------
>> 1 file changed, 48 insertions(+), 12 deletions(-)
>>
>> diff --git a/test/zdtm.py b/test/zdtm.py
>> index b899e38..b654ddb 100755
>> --- a/test/zdtm.py
>> +++ b/test/zdtm.py
>> @@ -675,12 +675,14 @@ join_ns_file = '/run/netns/zdtm_netns'
>>
>> class criu_cli:
>> @staticmethod
>> - def run(action, args, fault = None, strace = [], preexec = None):
>> + def run(action, args, fault = None, strace = [], preexec = None, nowait = False):
>> env = None
>> if fault:
>> print "Forcing %s fault" % fault
>> env = dict(os.environ, CRIU_FAULT = fault)
>> cr = subprocess.Popen(strace + [criu_bin, action] + args, env = env, preexec_fn = preexec)
>> + if nowait:
>> + return cr
>> return cr.wait()
>>
>>
>> @@ -722,13 +724,15 @@ class criu_rpc:
>> raise test_fail_exc('RPC for %s required' % arg)
>>
>> @staticmethod
>> - def run(action, args, fault = None, strace = [], preexec = None):
>> + def run(action, args, fault = None, strace = [], preexec = None, nowait = False):
>> if fault:
>> raise test_fail_exc('RPC and FAULT not supported')
>> if strace:
>> raise test_fail_exc('RPC and SAT not supported')
>> if preexec:
>> raise test_fail_exc('RPC and PREEXEC not supported')
>> + if nowait:
>> + raise test_fail_exc("RPC and status-fd not supported")
>>
>> ctx = {} # Object used to keep info untill action is done
>> criu = crpc.criu()
>> @@ -781,6 +785,8 @@ class criu:
>> self.__user = (opts['user'] and True or False)
>> self.__leave_stopped = (opts['stop'] and True or False)
>> self.__criu = (opts['rpc'] and criu_rpc or criu_cli)
>> + self.__lazy_pages_p = None
>> + self.__page_server_p = None
>>
>> def logs(self):
>> return self.__dump_path
>> @@ -812,7 +818,7 @@ class criu:
>> os.setresgid(58467, 58467, 58467)
>> os.setresuid(18943, 18943, 18943)
>>
>> - def __criu_act(self, action, opts, log = None):
>> + def __criu_act(self, action, opts = [], log = None, nowait = False):
>> if not log:
>> log = action + ".log"
>>
>> @@ -841,7 +847,20 @@ class criu:
>>
>> __ddir = self.__ddir()
>>
>> - ret = self.__criu.run(action, s_args, self.__fault, strace, preexec)
>> + status_fds = None
>> + if nowait:
>> + status_fds = os.pipe()
>> + s_args += ["--status-fd", str(status_fds[1])]
>> +
>> + ret = self.__criu.run(action, s_args, self.__fault, strace, preexec, nowait)
>> +
>> + if nowait:
>> + os.close(status_fds[1])
>> + if os.read(status_fds[0], 1) != '\0':
>> + ret = ret.wait()
>> + raise test_fail_exc("criu %s exited with %s" % (action, ret))
>> + os.close(status_fds[0])
>> + return ret
>>
>> grep_errors(os.path.join(__ddir, log))
>> if ret != 0:
>> @@ -879,11 +898,11 @@ class criu:
>> if self.__page_server:
>> print "Adding page server"
>>
>> - ps_opts = ["--port", "12345", "--daemon", "--pidfile", "ps.pid"]
>> + ps_opts = ["--port", "12345"]
>> if self.__dedup:
>> ps_opts += ["--auto-dedup"]
>>
>> - self.__criu_act("page-server", opts = ps_opts)
>> + self.__page_server_p = self.__criu_act("page-server", opts = ps_opts, nowait = True)
>> a_opts += ["--page-server", "--address", "127.0.0.1", "--port", "12345"]
>>
>> a_opts += self.__test.getdopts()
>> @@ -911,8 +930,11 @@ class criu:
>> pstree_check_stopped(self.__test.getpid())
>> pstree_signal(self.__test.getpid(), signal.SIGKILL)
>>
>> - if self.__page_server:
>> - wait_pid_die(int(rpidfile(self.__ddir() + "/ps.pid")), "page server")
>> + if self.__page_server_p:
>> + ret = self.__page_server_p.wait()
>> + self.__page_server_p = None
>> + if ret:
>> + raise test_fail_exc("criu page-server exited with %d" % ret)
>>
>> def restore(self):
>> r_opts = []
>> @@ -940,7 +962,7 @@ class criu:
>> ps_opts = ["--daemon", "--pidfile", "ps.pid",
>> "--port", "12345", "--lazy-pages"]
>> self.__criu_act("page-server", opts = ps_opts)
>> - self.__criu_act("lazy-pages", opts = lp_opts)
>> + self.__lazy_pages_p = self.__criu_act("lazy-pages", opts = lp_opts, nowait = True)
>> r_opts += ["--lazy-pages"]
>>
>> if self.__leave_stopped:
>> @@ -948,13 +970,16 @@ class criu:
>>
>> self.__criu_act("restore", opts = r_opts + ["--restore-detached"])
>>
>> + if self.__lazy_pages_p:
>> + ret = self.__lazy_pages_p.wait()
>> + self.__lazy_pages_p = None
>> + if ret:
>> + raise test_fail_exc("criu lazy-pages exited with %s" % ret)
>> +
>
> This hunk conflicts with moving wait() for lazy pages after t.stop()
> (https://github.com/xemul/criu/commit/9a473dabda7f74ab0a608531fd16b849dc8a1dd8)
>
> Apparently it should go to criu.fini() method, but, as of now, lazy pages
> will fail for most cases because it is still unable to detect termination
> of the monitored process :(
>
Andrey, would you handle this?
-- Pavel
More information about the CRIU
mailing list