[CRIU] [PATCH] [RFC] criu: test different situations when parasite must cure itself
Pavel Emelyanov
xemul at parallels.com
Fri Oct 17 06:21:54 PDT 2014
On 10/15/2014 03:24 PM, Andrey Vagin wrote:
> Here is a simple fault-injection engine. Each fault has uniq code.
> One of this code can be set to the CRIU_FAULT environment variable.
> On the next run this code will be triggered.
> For each fault we need to have code which emulate a specified behaviour.
>
> This patch checks following cases:
> * a parasite socket was closed unexpectedly
> * How parasite handles unsupported command
> * something failed when a parasite daemon is rinning.
> * criu dies unexpectedly
>
> Fault-injection code is compiled only if make is executed with DEBUG=1.
>
> The following command can be used to check all existing fault cases:
> make -C test fault-injection
We have a systemtap-based fault injection. Why is this version better?
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> Makefile.crtools | 3 +++
> cr-dump.c | 30 ++++++++++++++++++++++++++++++
> crtools.c | 6 ++++++
> fault-injection.c | 28 ++++++++++++++++++++++++++++
> include/fault-injection.h | 25 +++++++++++++++++++++++++
> test/Makefile | 12 +++++++++---
> test/fault-max.c | 8 ++++++++
> test/zdtm.sh | 18 +++++++++++++++++-
> 8 files changed, 126 insertions(+), 4 deletions(-)
> create mode 100644 fault-injection.c
> create mode 100644 include/fault-injection.h
> create mode 100644 test/fault-max.c
>
> diff --git a/Makefile.crtools b/Makefile.crtools
> index 7f21d25..f4d34ca 100644
> --- a/Makefile.crtools
> +++ b/Makefile.crtools
> @@ -69,6 +69,9 @@ endif
> obj-y += cr-service.o
> obj-y += sd-daemon.o
> obj-y += plugin.o
> +ifeq ($(DEBUG),1)
> +obj-y += fault-injection.o
> +endif
>
> ifneq ($(MAKECMDGOALS),clean)
> incdeps := y
> diff --git a/cr-dump.c b/cr-dump.c
> index 7d33e3d..ad8a415 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -72,6 +72,7 @@
> #include "irmap.h"
> #include "sysfs_parse.h"
> #include "action-scripts.h"
> +#include "fault-injection.h"
>
> #include "asm/dump.h"
>
> @@ -1465,6 +1466,32 @@ err_cure:
> goto err_free;
> }
>
> +static int fault_injected_daemon(struct parasite_ctl *ctl)
> +{
> +
> + if (fault_injected(FI_PARASITE_DAEMON))
> + return -1;
> +
> + if (fault_injected(FI_PARASITE_DAEMON_SOCK)) {
> + close(ctl->tsock);
> + return 0;
> + }
> +
> + if (fault_injected(FI_PARASITE_DAEMON_KILL)) {
> + exit(1);
> + BUG();
> + }
> +
> + if (fault_injected(FI_PARASITE_DAEMON_BAD_CMD)) {
> + if (parasite_execute_daemon(PARASITE_CMD_MAX, ctl) < 0)
> + return -1;
> + return 0;
> + }
> +
> + return 0;
> +
> +}
> +
> static int dump_one_task(struct pstree_item *item)
> {
> pid_t pid = item->pid.real;
> @@ -1540,6 +1567,9 @@ static int dump_one_task(struct pstree_item *item)
> goto err;
> }
>
> + if (fault_injected_daemon(parasite_ctl))
> + goto err;
> +
> if (root_ns_mask & CLONE_NEWPID && root_item == item) {
> int pfd;
>
> diff --git a/crtools.c b/crtools.c
> index a245bbb..d6f3054 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -38,6 +38,7 @@
> #include "cgroup.h"
> #include "cpu.h"
> #include "action-scripts.h"
> +#include "fault-injection.h"
>
> #include "setproctitle.h"
>
> @@ -192,6 +193,11 @@ int main(int argc, char *argv[], char *envp[])
>
> BUILD_BUG_ON(PAGE_SIZE != PAGE_IMAGE_SIZE);
>
> + if (fault_injection_init()) {
> + /* criu must be killed, because non-zero status is expected in this case */
> + BUG();
> + }
> +
> cr_pb_init();
> if (restrict_uid(getuid(), getgid()))
> return 1;
> diff --git a/fault-injection.c b/fault-injection.c
> new file mode 100644
> index 0000000..a26d361
> --- /dev/null
> +++ b/fault-injection.c
> @@ -0,0 +1,28 @@
> +#include <stdlib.h>
> +
> +#include "fault-injection.h"
> +
> +static unsigned int strategy;
> +
> +int fault_injection_init()
> +{
> + char *val;
> +
> + val = getenv("CRIU_FAULT");
> + if (val == NULL)
> + return 0;
> +
> + strategy = atoi(val);
> +
> + if (strategy ==0 || strategy >= FI_MAX)
> + return -1;
> +
> + return 0;
> +}
> +
> +int fault_injected(enum faults code)
> +{
> + if (strategy == code)
> + return 1;
> + return 0;
> +}
> diff --git a/include/fault-injection.h b/include/fault-injection.h
> new file mode 100644
> index 0000000..d147eb5
> --- /dev/null
> +++ b/include/fault-injection.h
> @@ -0,0 +1,25 @@
> +#ifndef __CR_FAULT_INJECTION_H__
> +#define __CR_FAULT_INJECTION_H__
> +
> +enum faults {
> + FI_NONE = 0,
> + FI_PARASITE_DAEMON,
> + FI_PARASITE_DAEMON_KILL,
> + FI_PARASITE_DAEMON_SOCK,
> + FI_PARASITE_DAEMON_BAD_CMD,
> + FI_MAX,
> +};
> +
> +#ifdef CR_DEBUG
> +
> +extern int fault_injection_init(void);
> +extern int fault_injected(enum faults code);
> +
> +#else
> +
> +static inline int fault_injection_init(void) { return 0; }
> +static inline int fault_injected(enum faults code) { return 0; }
> +
> +#endif
> +
> +#endif /* __CR_FAULT_INJECTION_H__ */
> diff --git a/test/Makefile b/test/Makefile
> index 9ab8f3d..b03c662 100644
> --- a/test/Makefile
> +++ b/test/Makefile
> @@ -35,16 +35,22 @@ zdtm-iter:
>
> .PHONY: zdtm
>
> -fault-injection: .FORCE
> - $(MAKE) -C fault-injection
> +fault-injection: fault-max .FORCE
> + $(MAKE) -C .. clean
> + $(MAKE) -C .. DEBUG=1
> + for i in $(shell seq 1 `./fault-max`); do \
> + ZDTM_ARGS="--fault $$i -C" $(MAKE) static/sigpending static/socket-tcpbuf; \
> + done
> .PHONY: fault-injection
>
> zdtm_ns: $(shell echo "$(TST)" | tr ' ' '\n' | awk '/^ns\// && !/tty|pty/ {print}')
> zdtm_nons: $(shell echo "$(TST)" | tr ' ' '\n' | awk '!/^ns\// || /tty|pty/ {print}')
>
> -override CFLAGS += -D_GNU_SOURCE
> +override CFLAGS += -D_GNU_SOURCE -I../include
> zdtm_ct: zdtm_ct.c
>
> +fault-max: fault-max.c ../include/fault-injection.h
> +
> clean:
> $(RM) zdtm_ct
> $(Q) $(RM) -r ./lib/
> diff --git a/test/fault-max.c b/test/fault-max.c
> new file mode 100644
> index 0000000..6a1a22c
> --- /dev/null
> +++ b/test/fault-max.c
> @@ -0,0 +1,8 @@
> +#include "fault-injection.h"
> +#include <stdio.h>
> +
> +int main()
> +{
> + printf("%d\n", FI_MAX - 1);
> + return 0;
> +}
> diff --git a/test/zdtm.sh b/test/zdtm.sh
> index cbd098c..a593c6c 100755
> --- a/test/zdtm.sh
> +++ b/test/zdtm.sh
> @@ -256,6 +256,7 @@ START_ONLY=0
> BATCH_TEST=0
> SPECIFIED_NAME_USED=0
> PERF=""
> +CRIU_FAULT=""
>
> zdtm_sep()
> { (
> @@ -598,7 +599,8 @@ EOF
> # Here we may have two cases: either checkpoint is failed
> # with some error code, or checkpoint is complete but return
> # code is non-zero because of post dump action.
> - if [ "$retcode" -ne 0 ] && [[ "$retcode" -ne 32 || -z "$dump_only" ]]; then
> +
> + if [ "$retcode" -ne 0 ] && [[ "$retcode" -ne 32 || -z "$dump_only" ]] && [ -z "$CRIU_FAULT" ]; then
> if [ $BATCH_TEST -eq 0 ]; then
> echo WARNING: $tname returned $retcode and left running for debug needs
> else
> @@ -606,6 +608,15 @@ EOF
> fi
> return 1
> fi
> +
> + if [ -n "$CRIU_FAULT" ]; then
> + dump_only=1
> + if [[ $retcode -eq 0 || $retcode -gt 128 ]]; then
> + echo "ERROR: criu returned $retcode"
> + return 1
> + fi
> + fi
> +
> cat $ddump/dump.log* | grep Error
>
> if [ -n "$SNAPSHOT" ]; then
> @@ -937,6 +948,11 @@ while :; do
> echo $$ > /sys/fs/cgroup/perf_event/zdtm/tasks
> shift
> ;;
> + --fault)
> + shift
> + export CRIU_FAULT=$1
> + shift
> + ;;
> -*)
> echo "Unrecognized option $1, aborting!" 1>&2
> usage
>
More information about the CRIU
mailing list