[CRIU] [PATCH 3/3] test: check exit codes for criu page-server and criu lazy-pages
Mike Rapoport
rppt at linux.vnet.ibm.com
Tue Jan 10 22:51:05 PST 2017
On Wed, Jan 11, 2017 at 12:21:45AM +0300, Andrei Vagin wrote:
> From: Andrei Vagin <avagin at virtuozzo.com>
>
> Cc: Mike Rapoport <rppt at linux.vnet.ibm.com>
> Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
> ---
> test/zdtm.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 48 insertions(+), 12 deletions(-)
>
> diff --git a/test/zdtm.py b/test/zdtm.py
> index b899e38..b654ddb 100755
> --- a/test/zdtm.py
> +++ b/test/zdtm.py
> @@ -675,12 +675,14 @@ join_ns_file = '/run/netns/zdtm_netns'
>
> class criu_cli:
> @staticmethod
> - def run(action, args, fault = None, strace = [], preexec = None):
> + def run(action, args, fault = None, strace = [], preexec = None, nowait = False):
> env = None
> if fault:
> print "Forcing %s fault" % fault
> env = dict(os.environ, CRIU_FAULT = fault)
> cr = subprocess.Popen(strace + [criu_bin, action] + args, env = env, preexec_fn = preexec)
> + if nowait:
> + return cr
> return cr.wait()
>
>
> @@ -722,13 +724,15 @@ class criu_rpc:
> raise test_fail_exc('RPC for %s required' % arg)
>
> @staticmethod
> - def run(action, args, fault = None, strace = [], preexec = None):
> + def run(action, args, fault = None, strace = [], preexec = None, nowait = False):
> if fault:
> raise test_fail_exc('RPC and FAULT not supported')
> if strace:
> raise test_fail_exc('RPC and SAT not supported')
> if preexec:
> raise test_fail_exc('RPC and PREEXEC not supported')
> + if nowait:
> + raise test_fail_exc("RPC and status-fd not supported")
>
> ctx = {} # Object used to keep info untill action is done
> criu = crpc.criu()
> @@ -781,6 +785,8 @@ class criu:
> self.__user = (opts['user'] and True or False)
> self.__leave_stopped = (opts['stop'] and True or False)
> self.__criu = (opts['rpc'] and criu_rpc or criu_cli)
> + self.__lazy_pages_p = None
> + self.__page_server_p = None
>
> def logs(self):
> return self.__dump_path
> @@ -812,7 +818,7 @@ class criu:
> os.setresgid(58467, 58467, 58467)
> os.setresuid(18943, 18943, 18943)
>
> - def __criu_act(self, action, opts, log = None):
> + def __criu_act(self, action, opts = [], log = None, nowait = False):
> if not log:
> log = action + ".log"
>
> @@ -841,7 +847,20 @@ class criu:
>
> __ddir = self.__ddir()
>
> - ret = self.__criu.run(action, s_args, self.__fault, strace, preexec)
> + status_fds = None
> + if nowait:
> + status_fds = os.pipe()
> + s_args += ["--status-fd", str(status_fds[1])]
> +
> + ret = self.__criu.run(action, s_args, self.__fault, strace, preexec, nowait)
> +
> + if nowait:
> + os.close(status_fds[1])
> + if os.read(status_fds[0], 1) != '\0':
> + ret = ret.wait()
> + raise test_fail_exc("criu %s exited with %s" % (action, ret))
> + os.close(status_fds[0])
> + return ret
>
> grep_errors(os.path.join(__ddir, log))
> if ret != 0:
> @@ -879,11 +898,11 @@ class criu:
> if self.__page_server:
> print "Adding page server"
>
> - ps_opts = ["--port", "12345", "--daemon", "--pidfile", "ps.pid"]
> + ps_opts = ["--port", "12345"]
> if self.__dedup:
> ps_opts += ["--auto-dedup"]
>
> - self.__criu_act("page-server", opts = ps_opts)
> + self.__page_server_p = self.__criu_act("page-server", opts = ps_opts, nowait = True)
> a_opts += ["--page-server", "--address", "127.0.0.1", "--port", "12345"]
>
> a_opts += self.__test.getdopts()
> @@ -911,8 +930,11 @@ class criu:
> pstree_check_stopped(self.__test.getpid())
> pstree_signal(self.__test.getpid(), signal.SIGKILL)
>
> - if self.__page_server:
> - wait_pid_die(int(rpidfile(self.__ddir() + "/ps.pid")), "page server")
> + if self.__page_server_p:
> + ret = self.__page_server_p.wait()
> + self.__page_server_p = None
> + if ret:
> + raise test_fail_exc("criu page-server exited with %d" % ret)
>
> def restore(self):
> r_opts = []
> @@ -940,7 +962,7 @@ class criu:
> ps_opts = ["--daemon", "--pidfile", "ps.pid",
> "--port", "12345", "--lazy-pages"]
> self.__criu_act("page-server", opts = ps_opts)
> - self.__criu_act("lazy-pages", opts = lp_opts)
> + self.__lazy_pages_p = self.__criu_act("lazy-pages", opts = lp_opts, nowait = True)
> r_opts += ["--lazy-pages"]
>
> if self.__leave_stopped:
> @@ -948,13 +970,16 @@ class criu:
>
> self.__criu_act("restore", opts = r_opts + ["--restore-detached"])
>
> + if self.__lazy_pages_p:
> + ret = self.__lazy_pages_p.wait()
> + self.__lazy_pages_p = None
> + if ret:
> + raise test_fail_exc("criu lazy-pages exited with %s" % ret)
> +
This hunk conflicts with moving wait() for lazy pages after t.stop()
(https://github.com/xemul/criu/commit/9a473dabda7f74ab0a608531fd16b849dc8a1dd8)
Apparently it should go to criu.fini() method, but, as of now, lazy pages
will fail for most cases because it is still unable to detect termination
of the monitored process :(
> if self.__leave_stopped:
> pstree_check_stopped(self.__test.getpid())
> pstree_signal(self.__test.getpid(), signal.SIGCONT)
>
> - if self.__lazy_pages:
> - wait_pid_die(int(rpidfile(self.__ddir() + "/lp.pid")), "lazy pages daemon")
> -
> @staticmethod
> def check(feature):
> return criu_cli.run("check", ["-v0", "--feature", feature]) == 0
> @@ -965,6 +990,16 @@ class criu:
> print "CRIU binary not built"
> sys.exit(1)
>
> + def kill(self):
> + if self.__lazy_pages_p:
> + self.__lazy_pages_p.terminate()
> + print "criu lazy-pages exited with %s" & self.wait()
> + self.__lazy_pages_p = None
> + if self.__page_server_p:
> + self.__page_server_p.terminate()
> + print "criu page-server exited with %s" & self.wait()
> + self.__page_server_p = None
> +
>
> def try_run_hook(test, args):
> hname = test.getname() + '.hook'
> @@ -1305,6 +1340,7 @@ def do_run_test(tname, tdesc, flavs, opts):
> print_sep("Test %s FAIL at %s" % (tname, e.step), '#')
> t.print_output()
> t.kill()
> + cr_api.kill()
> try_run_hook(t, ["--clean"])
> if cr_api.logs():
> add_to_report(cr_api.logs(), tname.replace('/', '_') + "_" + f + "/images")
> --
> 2.7.4
>
More information about the CRIU
mailing list