[CRIU] [PATCH] zdtm: check that a command completes successfully after a fault (v3)

Wed Mar 2 08:23:53 PST 2016

On 03/02/2016 06:52 PM, Andrew Vagin wrote:
> On Wed, Mar 02, 2016 at 05:08:02PM +0300, Pavel Emelyanov wrote:
>> On 03/01/2016 07:16 PM, Andrey Vagin wrote:
>>> From: Andrew Vagin <avagin at virtuozzo.com>
>>>
>>> I suggest to inject a fault and than try to execute the same command
>>> again without a fault to check that it will complete successfully.
>>>
>>> v2: skip a parasite blob when we are checking vma-s
>>> v3: remove a loop for two iterations
>>> Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
>>> ---
>>>  test/zdtm.py | 28 ++++++++++++++++++++++------
>>>  1 file changed, 22 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/test/zdtm.py b/test/zdtm.py
>>> index 1ace919..2bf7741 100755
>>> --- a/test/zdtm.py
>>> +++ b/test/zdtm.py
>>> @@ -656,10 +656,26 @@ class criu_cli:
>>>  
>>>  		preexec = self.__user and self.set_user_id or None
>>>  
>>> +		__ddir = self.__ddir()
>>> +
>>>  		ret = self.__criu(action, s_args, self.__fault, strace, preexec)
>>> -		grep_errors(os.path.join(self.__ddir(), log))
>>> +		grep_errors(os.path.join(__ddir, log))
>>>  		if ret != 0:
>>> -			if self.__fault or self.__test.blocking() or (self.__sat and action == 'restore'):
>>> +			if self.__fault:
>>> +				try_run_hook(self.__test, ["--fault", action])
>>
>> The hook would be called 2 times, won't it?
> 
> No, it would not be. We will call criu again.

OK, so do we need the try_run_hook for --fault in its original place then?

>>
>>> +				if action == "dump":
>>> +					# create a clean directory for images
>>> +					__ddir_fail = __ddir + ".fail"
>>> +					os.rename(__ddir, __ddir + ".fail")
>>
>> __ddir_fail unused, but was supposed to be.
>>
>>> +					os.mkdir(__ddir)
>>> +					os.chmod(__ddir, 0777)
>>> +				else:
>>
>> Need a comment here describing why "restore" is different.
>>
>>> +					os.rename(os.path.join(__ddir, log), os.path.join(__ddir, log + ".fail"))
>>> +				# try again without faults
>>> +				ret = self.__criu(action, s_args, False, strace, preexec)
> 
> ^^^ here
>>> +				if ret == 0:
>>> +					return
>>> +			if self.__test.blocking() or (self.__sat and action == 'restore'):
>>>  				raise test_fail_expected_exc(action)
>>>  			else:
>>>  				raise test_fail_exc("CRIU %s" % action)
>>> @@ -819,7 +835,7 @@ def get_visible_state(test):
>>>  		mounts[pid] = set(cmounts)
>>>  	return files, maps, mounts
>>>  
>>> -def check_visible_state(test, state):
>>> +def check_visible_state(test, state, opts):
>>>  	new = get_visible_state(test)
>>>  
>>>  	for pid in state[0].keys():
>>> @@ -835,8 +851,8 @@ def check_visible_state(test, state):
>>>  		if old_maps != new_maps:
>>>  			print "%s: Old maps lost: %s" % (pid, old_maps - new_maps)
>>>  			print "%s: New maps appeared: %s" % (pid, new_maps - old_maps)
>>> -
>>> -			raise test_fail_exc("maps compare")
>>> +			if not opts['fault']: # skip parasite blob
>>> +				raise test_fail_exc("maps compare")
>>>  
>>>  		old_mounts = state[2][pid]
>>>  		new_mounts = new[2][pid]
>>> @@ -937,7 +953,7 @@ def do_run_test(tname, tdesc, flavs, opts):
>>>  					t.stop()
>>>  				try_run_hook(t, ["--fault", e.cr_action])
>>>  			else:
>>> -				check_visible_state(t, s)
>>> +				check_visible_state(t, s, opts)
>>>  				t.stop()
>>>  				try_run_hook(t, ["--clean"])
>>>  		except test_fail_exc as e:
>>>
>>
> .
>