[CRIU] [PATCH 2/2] rpc: Report back first error message on failure

Pavel Emelyanov xemul at virtuozzo.com
Tue Sep 6 14:40:00 PDT 2016


On 09/05/2016 06:19 PM, Adrian Reber wrote:
> This patch breaks p.haul:
> 
> 15:18:07.038: 30677: 	Issuing pre-dump command to service
> Error (criu/log.c:117): BUG at criu/log.c:117

:( That's because of service calls fork() on pre-dump, child initializes
log to keep the message, but the responce is sent in parent process...

Hm...

Thanks for catching this, will think how to fix :)

-- Pavel

> 15:18:08.686: 30677: CRIU RPC error (0/4)
> Traceback (most recent call last):
>   File "./p.haul", line 47, in <module>
>     worker.start_migration()
>   File "/share/p.haul/phaul/iters.py", line 159, in start_migration
>     self.__start_live_migration()
>   File "/share/p.haul/phaul/iters.py", line 197, in __start_live_migration
>     self.criu_connection, self.fs)
>   File "/share/p.haul/phaul/criu_cr.py", line 14, in criu_predump
>     resp = criu_connection.send_req(req)
>   File "/share/p.haul/phaul/criu_api.py", line 70, in send_req
>     return self._recv_resp()
>   File "/share/p.haul/phaul/criu_api.py", line 57, in _recv_resp
>     raise Exception("CRIU RPC error (%d/%d)" % (resp.type, self._last_req))
> Exception: CRIU RPC error (0/4)
> 
> Running p.haul with '--no-pre-dump' does not show that error. Reverting
> commit 2157799c also fixes the error.
> 
> 		Adrian
> 
> On Wed, Aug 24, 2016 at 09:12:03PM +0300, Pavel Emelyanov wrote:
>> When running criu in swrk mode the client typically wants to know
>> the reason of failure. Right now criu reports back NOTHING but the
>> fact that dump/restore/etc fails. We've tried to address this by
>> introducing the cr-errno engine, but it doesn't seem to be informative
>> enough and is hard to maintain -- adding new errno-s is boring :(
>>
>> I propose to report back the first message with ERROR level upon
>> failrure as __typically__ the very first error message indicates
>> that proceeding is impossible and criu rolls back (generating more
>> error messages, so it's crucial to know the very first one).
>>
>> If we ever meet the situation that the first pr_err/pr_perror doesn't
>> cause criu to exit, this printing should be fixed to be pr_warn.
>>
>> Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
>> ---
>>  criu/cr-service.c  |  7 +++++++
>>  criu/include/log.h |  2 ++
>>  criu/log.c         | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  images/rpc.proto   |  1 +
>>  4 files changed, 59 insertions(+)
>>
>> diff --git a/criu/cr-service.c b/criu/cr-service.c
>> index a2b4027..c29e6ef 100644
>> --- a/criu/cr-service.c
>> +++ b/criu/cr-service.c
>> @@ -119,6 +119,8 @@ static void set_resp_err(CriuResp *resp)
>>  		resp->has_cr_errno = true;
>>  		resp->cr_errno = errn;
>>  	}
>> +
>> +	resp->cr_errmsg = log_first_err();
>>  }
>>  
>>  static void send_criu_err(int sk, char *msg)
>> @@ -289,6 +291,11 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
>>  		goto err;
>>  	}
>>  
>> +	if (log_keep_err()) {
>> +		pr_perror("Can't tune log");
>> +		goto err;
>> +	}
>> +
>>  	/* checking flags from client */
>>  	if (req->has_leave_running && req->leave_running)
>>  		opts.final_state = TASK_ALIVE;
>> diff --git a/criu/include/log.h b/criu/include/log.h
>> index fe53a7c..d2b25ec 100644
>> --- a/criu/include/log.h
>> +++ b/criu/include/log.h
>> @@ -9,6 +9,8 @@ extern int log_init(const char *output);
>>  extern void log_fini(void);
>>  extern int log_init_by_pid(void);
>>  extern void log_closedir(void);
>> +extern int log_keep_err(void);
>> +extern char *log_first_err(void);
>>  
>>  extern void log_set_fd(int fd);
>>  extern int log_get_fd(void);
>> diff --git a/criu/log.c b/criu/log.c
>> index 6c5d807..64ce8c5 100644
>> --- a/criu/log.c
>> +++ b/criu/log.c
>> @@ -17,6 +17,9 @@
>>  #include "util.h"
>>  #include "cr_options.h"
>>  #include "servicefd.h"
>> +#include "rst-malloc.h"
>> +#include "lock.h"
>> +#include "string.h"
>>  
>>  #define DEFAULT_LOGFD		STDERR_FILENO
>>  /* Enable timestamps if verbosity is increased from default */
>> @@ -72,6 +75,49 @@ static void reset_buf_off(void)
>>  		buf_off = 0;
>>  }
>>  
>> +/*
>> + * Keeping the very first error messsage for RPC to report back.
>> + */
>> +struct str_and_lock {
>> +	mutex_t l;
>> +	char s[1024];
>> +};
>> +
>> +static struct str_and_lock *first_err;
>> +
>> +int log_keep_err(void)
>> +{
>> +	first_err = shmalloc(sizeof(struct str_and_lock));
>> +	if (first_err == NULL)
>> +		return -1;
>> +
>> +	mutex_init(&first_err->l);
>> +	first_err->s[0] = '\0';
>> +	return 0;
>> +}
>> +
>> +static void log_note_err(char *msg)
>> +{
>> +	if (first_err && first_err->s[0] == '\0') {
>> +		/*
>> +		 * In any action other than restore this locking is
>> +		 * actually not required, but ... it's error path
>> +		 * anyway, so it doesn't make much sence to try hard
>> +		 * and optimize this out.
>> +		 */
>> +		mutex_lock(&first_err->l);
>> +		if (first_err->s[0] == '\0')
>> +			strlcpy(first_err->s, msg, sizeof(first_err->s));
>> +		mutex_unlock(&first_err->l);
>> +	}
>> +}
>> +
>> +char *log_first_err(void)
>> +{
>> +	BUG_ON(!first_err);
>> +	return first_err->s[0] == '\0' ? NULL : first_err->s;
>> +}
>> +
>>  int log_init(const char *output)
>>  {
>>  	int new_logfd, fd;
>> @@ -178,6 +224,9 @@ static void __print_on_level(unsigned int loglevel, const char *format, va_list
>>  		off += ret;
>>  	}
>>  	errno =  __errno;
>> +
>> +	if (loglevel == LOG_ERROR)
>> +		log_note_err(buffer + buf_off);
>>  }
>>  
>>  void print_on_level(unsigned int loglevel, const char *format, ...)
>> diff --git a/images/rpc.proto b/images/rpc.proto
>> index 11d169b..9b36178 100644
>> --- a/images/rpc.proto
>> +++ b/images/rpc.proto
>> @@ -187,4 +187,5 @@ message criu_resp {
>>  
>>  	optional int32			cr_errno	= 7;
>>  	optional criu_features		features	= 8;
>> +	optional string			cr_errmsg	= 9;
>>  }
>> -- 
>> 2.5.0
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU at openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu
> .
> 



More information about the CRIU mailing list