[CRIU] [PATCH] [RFC] criu: test different situations when parasite must cure itself

Andrey Vagin avagin at openvz.org
Wed Oct 15 04:24:18 PDT 2014


Here is a simple fault-injection engine. Each fault has uniq code.
One of this code can be set to the CRIU_FAULT environment variable.
On the next run this code will be triggered.
For each fault we need to have code which emulate a specified behaviour.

This patch checks following cases:
* a parasite socket was closed unexpectedly
* How parasite handles unsupported command
* something failed when a parasite daemon is rinning.
* criu dies unexpectedly

Fault-injection code is compiled only if make is executed with DEBUG=1.

The following command can be used to check all existing fault cases:
make -C test fault-injection

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 Makefile.crtools          |  3 +++
 cr-dump.c                 | 30 ++++++++++++++++++++++++++++++
 crtools.c                 |  6 ++++++
 fault-injection.c         | 28 ++++++++++++++++++++++++++++
 include/fault-injection.h | 25 +++++++++++++++++++++++++
 test/Makefile             | 12 +++++++++---
 test/fault-max.c          |  8 ++++++++
 test/zdtm.sh              | 18 +++++++++++++++++-
 8 files changed, 126 insertions(+), 4 deletions(-)
 create mode 100644 fault-injection.c
 create mode 100644 include/fault-injection.h
 create mode 100644 test/fault-max.c

diff --git a/Makefile.crtools b/Makefile.crtools
index 7f21d25..f4d34ca 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -69,6 +69,9 @@ endif
 obj-y	+= cr-service.o
 obj-y	+= sd-daemon.o
 obj-y	+= plugin.o
+ifeq ($(DEBUG),1)
+obj-y	+= fault-injection.o
+endif
 
 ifneq ($(MAKECMDGOALS),clean)
 incdeps := y
diff --git a/cr-dump.c b/cr-dump.c
index 7d33e3d..ad8a415 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -72,6 +72,7 @@
 #include "irmap.h"
 #include "sysfs_parse.h"
 #include "action-scripts.h"
+#include "fault-injection.h"
 
 #include "asm/dump.h"
 
@@ -1465,6 +1466,32 @@ err_cure:
 	goto err_free;
 }
 
+static int fault_injected_daemon(struct parasite_ctl *ctl)
+{
+
+	if (fault_injected(FI_PARASITE_DAEMON))
+		return -1;
+
+	if (fault_injected(FI_PARASITE_DAEMON_SOCK)) {
+		close(ctl->tsock);
+		return 0;
+	}
+
+	if (fault_injected(FI_PARASITE_DAEMON_KILL)) {
+		exit(1);
+		BUG();
+	}
+
+	if (fault_injected(FI_PARASITE_DAEMON_BAD_CMD)) {
+		if (parasite_execute_daemon(PARASITE_CMD_MAX, ctl) < 0)
+			return -1;
+		return 0;
+	}
+
+	return 0;
+
+}
+
 static int dump_one_task(struct pstree_item *item)
 {
 	pid_t pid = item->pid.real;
@@ -1540,6 +1567,9 @@ static int dump_one_task(struct pstree_item *item)
 		goto err;
 	}
 
+	if (fault_injected_daemon(parasite_ctl))
+		goto err;
+
 	if (root_ns_mask & CLONE_NEWPID && root_item == item) {
 		int pfd;
 
diff --git a/crtools.c b/crtools.c
index a245bbb..d6f3054 100644
--- a/crtools.c
+++ b/crtools.c
@@ -38,6 +38,7 @@
 #include "cgroup.h"
 #include "cpu.h"
 #include "action-scripts.h"
+#include "fault-injection.h"
 
 #include "setproctitle.h"
 
@@ -192,6 +193,11 @@ int main(int argc, char *argv[], char *envp[])
 
 	BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE);
 
+	if (fault_injection_init()) {
+		/* criu must be killed, because non-zero status is expected in this case */
+		BUG();
+	}
+
 	cr_pb_init();
 	if (restrict_uid(getuid(), getgid()))
 		return 1;
diff --git a/fault-injection.c b/fault-injection.c
new file mode 100644
index 0000000..a26d361
--- /dev/null
+++ b/fault-injection.c
@@ -0,0 +1,28 @@
+#include <stdlib.h>
+
+#include "fault-injection.h"
+
+static unsigned int strategy;
+
+int fault_injection_init()
+{
+	char *val;
+
+	val = getenv("CRIU_FAULT");
+	if (val == NULL)
+		return 0;
+
+	strategy = atoi(val);
+
+	if (strategy ==0 || strategy >= FI_MAX)
+		return -1;
+
+	return 0;
+}
+
+int fault_injected(enum faults code)
+{
+	if (strategy == code)
+		return 1;
+	return 0;
+}
diff --git a/include/fault-injection.h b/include/fault-injection.h
new file mode 100644
index 0000000..d147eb5
--- /dev/null
+++ b/include/fault-injection.h
@@ -0,0 +1,25 @@
+#ifndef __CR_FAULT_INJECTION_H__
+#define __CR_FAULT_INJECTION_H__
+
+enum faults {
+	FI_NONE = 0,
+	FI_PARASITE_DAEMON,
+	FI_PARASITE_DAEMON_KILL,
+	FI_PARASITE_DAEMON_SOCK,
+	FI_PARASITE_DAEMON_BAD_CMD,
+	FI_MAX,
+};
+
+#ifdef CR_DEBUG
+
+extern int fault_injection_init(void);
+extern int fault_injected(enum faults code);
+
+#else
+
+static inline int fault_injection_init(void) { return 0; }
+static inline int fault_injected(enum faults code) { return 0; }
+
+#endif
+
+#endif /* __CR_FAULT_INJECTION_H__ */
diff --git a/test/Makefile b/test/Makefile
index 9ab8f3d..b03c662 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -35,16 +35,22 @@ zdtm-iter:
 
 .PHONY: zdtm
 
-fault-injection: .FORCE
-	$(MAKE) -C fault-injection
+fault-injection: fault-max .FORCE
+	$(MAKE) -C .. clean
+	$(MAKE) -C .. DEBUG=1
+	for i in $(shell seq 1 `./fault-max`); do						\
+		ZDTM_ARGS="--fault $$i -C" $(MAKE) static/sigpending static/socket-tcpbuf;	\
+	done
 .PHONY: fault-injection
 
 zdtm_ns:   $(shell echo "$(TST)" | tr ' ' '\n' | awk '/^ns\// && !/tty|pty/ {print}')
 zdtm_nons: $(shell echo "$(TST)" | tr ' ' '\n' | awk '!/^ns\// || /tty|pty/ {print}')
 
-override CFLAGS += -D_GNU_SOURCE
+override CFLAGS += -D_GNU_SOURCE -I../include
 zdtm_ct: zdtm_ct.c
 
+fault-max: fault-max.c ../include/fault-injection.h
+
 clean:
 	$(RM) zdtm_ct
 	$(Q) $(RM) -r ./lib/
diff --git a/test/fault-max.c b/test/fault-max.c
new file mode 100644
index 0000000..6a1a22c
--- /dev/null
+++ b/test/fault-max.c
@@ -0,0 +1,8 @@
+#include "fault-injection.h"
+#include <stdio.h>
+
+int main()
+{
+	printf("%d\n", FI_MAX - 1);
+	return 0;
+}
diff --git a/test/zdtm.sh b/test/zdtm.sh
index cbd098c..a593c6c 100755
--- a/test/zdtm.sh
+++ b/test/zdtm.sh
@@ -256,6 +256,7 @@ START_ONLY=0
 BATCH_TEST=0
 SPECIFIED_NAME_USED=0
 PERF=""
+CRIU_FAULT=""
 
 zdtm_sep()
 { (
@@ -598,7 +599,8 @@ EOF
 		# Here we may have two cases: either checkpoint is failed
 		# with some error code, or checkpoint is complete but return
 		# code is non-zero because of post dump action.
-		if [ "$retcode" -ne 0 ] && [[ "$retcode" -ne 32 || -z "$dump_only" ]]; then
+
+		if [ "$retcode" -ne 0 ] && [[ "$retcode" -ne 32 || -z "$dump_only" ]] && [ -z "$CRIU_FAULT" ]; then
 			if [ $BATCH_TEST -eq 0 ]; then
 				echo WARNING: $tname returned $retcode and left running for debug needs
 			else
@@ -606,6 +608,15 @@ EOF
 			fi
 			return 1
 		fi
+
+		if [ -n "$CRIU_FAULT" ]; then
+			dump_only=1
+			if [[ $retcode -eq 0 || $retcode -gt 128 ]]; then
+				echo "ERROR: criu returned $retcode"
+				return 1
+			fi
+		fi
+
 		cat $ddump/dump.log* | grep Error
 
 		if [ -n "$SNAPSHOT" ]; then
@@ -937,6 +948,11 @@ while :; do
 		echo $$ > /sys/fs/cgroup/perf_event/zdtm/tasks
 		shift
 		;;
+	  --fault)
+		shift
+		export CRIU_FAULT=$1
+		shift
+		;;
 	  -*)
 		echo "Unrecognized option $1, aborting!" 1>&2
 		usage
-- 
1.9.3



More information about the CRIU mailing list