[CRIU] [PATCH 2/2] rpc: Report back first error message on failure

Pavel Emelyanov xemul at virtuozzo.com
Wed Aug 24 11:12:03 PDT 2016


When running criu in swrk mode the client typically wants to know
the reason of failure. Right now criu reports back NOTHING but the
fact that dump/restore/etc fails. We've tried to address this by
introducing the cr-errno engine, but it doesn't seem to be informative
enough and is hard to maintain -- adding new errno-s is boring :(

I propose to report back the first message with ERROR level upon
failrure as __typically__ the very first error message indicates
that proceeding is impossible and criu rolls back (generating more
error messages, so it's crucial to know the very first one).

If we ever meet the situation that the first pr_err/pr_perror doesn't
cause criu to exit, this printing should be fixed to be pr_warn.

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 criu/cr-service.c  |  7 +++++++
 criu/include/log.h |  2 ++
 criu/log.c         | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 images/rpc.proto   |  1 +
 4 files changed, 59 insertions(+)

diff --git a/criu/cr-service.c b/criu/cr-service.c
index a2b4027..c29e6ef 100644
--- a/criu/cr-service.c
+++ b/criu/cr-service.c
@@ -119,6 +119,8 @@ static void set_resp_err(CriuResp *resp)
 		resp->has_cr_errno = true;
 		resp->cr_errno = errn;
 	}
+
+	resp->cr_errmsg = log_first_err();
 }
 
 static void send_criu_err(int sk, char *msg)
@@ -289,6 +291,11 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 		goto err;
 	}
 
+	if (log_keep_err()) {
+		pr_perror("Can't tune log");
+		goto err;
+	}
+
 	/* checking flags from client */
 	if (req->has_leave_running && req->leave_running)
 		opts.final_state = TASK_ALIVE;
diff --git a/criu/include/log.h b/criu/include/log.h
index fe53a7c..d2b25ec 100644
--- a/criu/include/log.h
+++ b/criu/include/log.h
@@ -9,6 +9,8 @@ extern int log_init(const char *output);
 extern void log_fini(void);
 extern int log_init_by_pid(void);
 extern void log_closedir(void);
+extern int log_keep_err(void);
+extern char *log_first_err(void);
 
 extern void log_set_fd(int fd);
 extern int log_get_fd(void);
diff --git a/criu/log.c b/criu/log.c
index 6c5d807..64ce8c5 100644
--- a/criu/log.c
+++ b/criu/log.c
@@ -17,6 +17,9 @@
 #include "util.h"
 #include "cr_options.h"
 #include "servicefd.h"
+#include "rst-malloc.h"
+#include "lock.h"
+#include "string.h"
 
 #define DEFAULT_LOGFD		STDERR_FILENO
 /* Enable timestamps if verbosity is increased from default */
@@ -72,6 +75,49 @@ static void reset_buf_off(void)
 		buf_off = 0;
 }
 
+/*
+ * Keeping the very first error messsage for RPC to report back.
+ */
+struct str_and_lock {
+	mutex_t l;
+	char s[1024];
+};
+
+static struct str_and_lock *first_err;
+
+int log_keep_err(void)
+{
+	first_err = shmalloc(sizeof(struct str_and_lock));
+	if (first_err == NULL)
+		return -1;
+
+	mutex_init(&first_err->l);
+	first_err->s[0] = '\0';
+	return 0;
+}
+
+static void log_note_err(char *msg)
+{
+	if (first_err && first_err->s[0] == '\0') {
+		/*
+		 * In any action other than restore this locking is
+		 * actually not required, but ... it's error path
+		 * anyway, so it doesn't make much sence to try hard
+		 * and optimize this out.
+		 */
+		mutex_lock(&first_err->l);
+		if (first_err->s[0] == '\0')
+			strlcpy(first_err->s, msg, sizeof(first_err->s));
+		mutex_unlock(&first_err->l);
+	}
+}
+
+char *log_first_err(void)
+{
+	BUG_ON(!first_err);
+	return first_err->s[0] == '\0' ? NULL : first_err->s;
+}
+
 int log_init(const char *output)
 {
 	int new_logfd, fd;
@@ -178,6 +224,9 @@ static void __print_on_level(unsigned int loglevel, const char *format, va_list
 		off += ret;
 	}
 	errno =  __errno;
+
+	if (loglevel == LOG_ERROR)
+		log_note_err(buffer + buf_off);
 }
 
 void print_on_level(unsigned int loglevel, const char *format, ...)
diff --git a/images/rpc.proto b/images/rpc.proto
index 11d169b..9b36178 100644
--- a/images/rpc.proto
+++ b/images/rpc.proto
@@ -187,4 +187,5 @@ message criu_resp {
 
 	optional int32			cr_errno	= 7;
 	optional criu_features		features	= 8;
+	optional string			cr_errmsg	= 9;
 }
-- 
2.5.0



More information about the CRIU mailing list