[CRIU] [RFC PATCH 17/21] zdtm: add the multi-test launcher

Thu Feb 25 14:43:27 PST 2016

On Fri, Feb 26, 2016 at 01:30:48AM +0300, Ivan Shapovalov wrote:
> On 2016-02-25 at 13:49 -0800, Andrew Vagin wrote:
> > On Fri, Feb 19, 2016 at 05:50:40PM +0300, Ivan Shapovalov wrote:
> > > 
> > > This program works as a "host" to multiple test binaries, which are
> > > loaded into the same address space and then executed all at once.
> > > 
> > > The launcher tries to avoid using threads (to reduce probability of
> > > affecting the test results) and we do not alter the tests
> > > themselves.
> > > Instead, we hook various functions in libzdtmtst and use
> > > <ucontext.h>
> > > to redirect execution flow between the launcher and tests.
> > > 
> > > That is, first we run each test from start of their main() up to
> > > entry
> > > to test_init(), where each test is interrupted and contexts are
> > > saved.
> > > After all tests have been run to that point, real test_init() is
> > > performed "once and for all". The same method is used repeatedly
> > > for all
> > > phases of tests' execution.
> > > 
> > > The only case when we use threads is when some of tests want to run
> > > a
> > > busy loop during C/R. In this case we spawn a thread (using
> > > clone(), not
> > > pthreads) for each such test. After C/R we join all threads and do
> > > verification in the usual sequential mode.
> > [avagin at laptop criu]$ git grep '\<wait\>(' test/zdtm | wc -l
> > 66
> > 
> > Many tests calls wait() which waits one of children. How do you
> > handle
> > this in the launcher? Actually a test wants to wait its children, it
> > doesn't expect to meet other processes.
> 
> We don't create waitable children. wait() will not and cannot wait for
> other threads (we wait for them via a futex, just like nptl does it).

I am not sure that you understand me.

Look at this example:
pipe00			socket_aio
-------------------------------------
test_fork		test_fork();
wait(&status);		wait(&status)

My question is how both tests wait their own children.
> 
> --
> Ivan Shapovalov / intelfx /
> 
> > 
> > > 
> > > 
> > > Signed-off-by: Ivan Shapovalov <intelfx at intelfx.name>
> > > ---
> > >  criu/include/compiler.h          |   1 +
> > >  test/zdtm/.gitignore             |   1 +
> > >  test/zdtm/Makefile               |  14 +-
> > >  test/zdtm/launcher/Makefile      |  32 ++
> > >  test/zdtm/launcher/launcher.c    | 831
> > > +++++++++++++++++++++++++++++++++++++++
> > >  test/zdtm/lib/test.c             |  57 ++-
> > >  test/zdtm/lib/zdtmtst.h          |  24 +-
> > >  test/zdtm/lib/zdtmtst_internal.h |  28 ++
> > >  8 files changed, 973 insertions(+), 15 deletions(-)
> > >  create mode 100644 test/zdtm/launcher/Makefile
> > >  create mode 100644 test/zdtm/launcher/launcher.c
> > > 
> > > diff --git a/criu/include/compiler.h b/criu/include/compiler.h
> > > index 37a6880..56fd87c 100644
> > > --- a/criu/include/compiler.h
> > > +++ b/criu/include/compiler.h
> > > @@ -18,6 +18,7 @@
> > >  #define __used			__attribute__((__used__))
> > >  #define __maybe_unused		__attribute__((unused))
> > >  #define __always_unused		__attribute__((unused))
> > > +#define __weak                  __attribute__((weak))
> > >  
> > >  #define __section(S)		__attribute__
> > > ((__section__(#S)))
> > >  
> > > diff --git a/test/zdtm/.gitignore b/test/zdtm/.gitignore
> > > index ce4e302..471496a 100644
> > > --- a/test/zdtm/.gitignore
> > > +++ b/test/zdtm/.gitignore
> > > @@ -1,3 +1,4 @@
> > > +/launcher/launcher
> > >  /lib/libzdtmtst.so
> > >  /live/static/apparmor
> > >  /live/static/arm-neon00
> > > diff --git a/test/zdtm/Makefile b/test/zdtm/Makefile
> > > index 4d66395..a437b86 100644
> > > --- a/test/zdtm/Makefile
> > > +++ b/test/zdtm/Makefile
> > > @@ -1,15 +1,17 @@
> > > -SUBDIRS	=	lib live
> > > +SUBDIRS	=	lib live launcher
> > >  
> > >  default:	all
> > >  .PHONY: default lib live
> > >  
> > > -lib:
> > > -	$(MAKE) -C lib all
> > > +$(SUBDIRS):
> > > +	$(MAKE) -C $@ all
> > >  
> > > -live: lib
> > > -	$(MAKE) -C live all
> > > +live: lib launcher
> > >  
> > > -all: lib live
> > > +launcher: lib
> > > +
> > > +all: $(SUBDIRS)
> > >  	@true
> > > +
> > >  %:
> > >  	set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done
> > > diff --git a/test/zdtm/launcher/Makefile
> > > b/test/zdtm/launcher/Makefile
> > > new file mode 100644
> > > index 0000000..7b00dd3
> > > --- /dev/null
> > > +++ b/test/zdtm/launcher/Makefile
> > > @@ -0,0 +1,32 @@
> > > +include ../Makefile.inc
> > > +
> > > +LIBDIR	= ../lib
> > > +LDLIBS	= -lzdtmtst
> > > +override LDFLAGS += -L$(LIBDIR) -Wl,-rpath,\$$ORIGIN/$(LIBDIR)
> > > +
> > > +PROGS   = launcher
> > > +SRC	= $(PROGS:%=%.c)
> > > +OBJ	= $(SRC:%.c=%.o)
> > > +DEP	= $(SRC:%.c=%.d)
> > > +
> > > +DEPEND.c = $(COMPILE.c) -MM -MP
> > > +%.d:	%.c
> > > +	$(DEPEND.c) $(OUTPUT_OPTION) $<
> > > +
> > > +all:	$(PROGS)
> > > +
> > > +launcher: override LDLIBS += -ldl
> > > +
> > > +clean:
> > > +	$(RM) -f $(OBJ) $(PROGS) *~
> > > +
> > > +cleandep:	clean
> > > +	$(RM) -f $(DEP)
> > > +
> > > +realclean:	cleandep
> > > +
> > > +.PHONY:	clean cleandep realclean
> > > +
> > > +ifeq ($(filter-out no-deps-targets, $(MAKECMDGOALS)),)
> > > +-include $(DEP)
> > > +endif
> > > diff --git a/test/zdtm/launcher/launcher.c
> > > b/test/zdtm/launcher/launcher.c
> > > new file mode 100644
> > > index 0000000..82d7b01
> > > --- /dev/null
> > > +++ b/test/zdtm/launcher/launcher.c
> > > @@ -0,0 +1,831 @@
> > > +#define _GNU_SOURCE
> > > +#include <dlfcn.h>
> > > +#include <stdio.h>
> > > +#include <stdlib.h>
> > > +#include <signal.h>
> > > +#include <ucontext.h>
> > > +#include <linux/unistd.h>
> > > +#include <sys/wait.h>
> > > +#include <sys/syscall.h>
> > > +#include <sched.h>
> > > +
> > > +#include "zdtmtst.h"
> > > +#include "zdtmtst_internal.h"
> > > +#include "lock.h"
> > > +
> > > +#define memzero(arg) memset(&arg, 0, sizeof(arg))
> > > +
> > > +/* options defined in libzdtmtst itself */
> > > +static struct long_opt *global_opts_head;
> > > +
> > > +/* global variables for the context switching mechanism */
> > > +static ucontext_t return_ctx;
> > > +static struct test *current_test, *all_tests;
> > > +
> > > +/* real exit() function */
> > > +static NORETURN void(*real_exit)(int);
> > > +
> > > +/* main thread id of the launcher */
> > > +static pid_t master_tid;
> > > +
> > > +/* used to wait until a thread's temporary stack becomes unused */
> > > +static futex_t test_thread_futex;
> > > +
> > > +/* used to enable hooks while we are in test context */
> > > +static int multitest_enabled;
> > > +
> > > +/* used to disable hooks for the main thread while busy-loop
> > > threads are
> > > + * active */
> > > +static int threading_enabled;
> > > +
> > > +/* used to tell whether the last test returned with some signals
> > > pending
> > > + * (which means that we must not run any more tests) */
> > > +static int have_pending_signals;
> > > +
> > > +static long sys_gettid(void)
> > > +{
> > > +        return syscall(__NR_gettid);
> > > +}
> > > +
> > > +static NORETURN void sys_exit(int status)
> > > +{
> > > +	syscall(__NR_exit, status);
> > > +	__builtin_unreachable();
> > > +}
> > > +
> > > +static const char *test_phase_str[] = {
> > > +	[PHASE_START] = "<main not started>",
> > > +	[PHASE_TEST_INIT] = "test_init",
> > > +	[PHASE_TEST_EXT_INIT] = "test_ext_init",
> > > +	[PHASE_TEST_DAEMON] = "test_daemon",
> > > +	[PHASE_TEST_WAITSIG] = "test_waitsig/test_go",
> > > +	[PHASE_RETURNED] = "<main finished>",
> > > +};
> > > +
> > > +/* describes a single test executable which is dlopen()'ed */
> > > +struct test
> > > +{
> > > +	struct test *next;
> > > +
> > > +	void *handle;
> > > +	const char *name;
> > > +	struct long_opt *opts_head;
> > > +	int (*main) (int, char **);
> > > +
> > > +	int argc;
> > > +	char **argv;
> > > +
> > > +	/* for the freer */
> > > +	int is_last;
> > > +
> > > +	/* whether this test needs a separate thread for its busy-
> > > loop */
> > > +	int is_threaded;
> > > +
> > > +	/* for threading efficiency evaluation */
> > > +	int busy_loop_iters;
> > > +
> > > +	/* whether the test is currently executing (used with
> > > getcontext() to return from threads) */
> > > +	int in_progress;
> > > +
> > > +	/* whether this specific test returned with some signals
> > > pending */
> > > +
> > > +	/* whether the test did actually return from main(), as
> > > opposed to doing a swapcontext() */
> > > +	enum test_phase phase;
> > > +	int retcode;
> > > +	enum test_result result;
> > > +
> > > +	/* the context where the test execution left off and
> > > returned to the launcher */
> > > +	ucontext_t ctx;
> > > +
> > > +	/* the tid of the test's busy loop thread (if any) */
> > > +	union {
> > > +		int tid;
> > > +		futex_t tid_futex;
> > > +	};
> > > +
> > > +	/* the alternate stack for the test execution context */
> > > +	char ctx_stack[8*1<<20];
> > > +};
> > > +
> > > +/* deallocates a heap-allocated list of struct test */
> > > +static void tests_free(struct test **arg)
> > > +{
> > > +	if (*arg != NULL) {
> > > +		for (struct test *test = *arg, *next; test !=
> > > NULL; test = next) {
> > > +			next = test->next;
> > > +			dlclose(test->handle);
> > > +			free(test);
> > > +		}
> > > +	}
> > > +}
> > > +
> > > +static void test_check_ctx(struct test *test)
> > > +{
> > > +	if (test->phase >= PHASE_RETURNED) {
> > > +		pr_err("Cannot jump to context of test \"%s\",
> > > context invalid: %s\n", test->name, test_phase_str[test->phase]);
> > > +		abort();
> > > +	}
> > > +}
> > > +
> > > +static struct test *test_current(void)
> > > +{
> > > +	assert(getpid() == master_pid);
> > > +
> > > +	pid_t tid = sys_gettid();
> > > +	if (tid == master_tid) {
> > > +		assert(current_test != NULL);
> > > +		return current_test;
> > > +	} else {
> > > +		assert(current_test == NULL);
> > > +		assert(all_tests != NULL);
> > > +
> > > +		for (struct test *test = all_tests; test != NULL;
> > > test = test->next) {
> > > +			/* test->tid may change behind us due to
> > > +			 * CLONE_CHILD_CLEARTID, but _our_ test-
> > > >tid
> > > +			 * won't change while we are alive. */
> > > +			if (test->tid == tid) {
> > > +				return test;
> > > +			}
> > > +		}
> > > +
> > > +		pr_err("Could not find a test descriptor for tid
> > > %d (master tid is %d)\n", tid, master_tid);
> > > +		abort();
> > > +	}
> > > +}
> > > +
> > > +/* a wrapper for the test's main(), executed in an own context */
> > > +static void test_main(void)
> > > +{
> > > +	struct test *test = test_current();
> > > +
> > > +	futex_set_and_wake(&test_thread_futex, 1);
> > > +
> > > +	int r = test->main(test->argc, test->argv);
> > > +
> > > +	/* if this is a test's ancillary child process, exit right
> > > away */
> > > +	if (getpid() != master_pid) {
> > > +		real_exit(r);
> > > +	}
> > > +
> > > +	test->phase = PHASE_RETURNED;
> > > +	test->retcode = r;
> > > +	memset(&test->ctx, 0, sizeof(test->ctx));
> > > +
> > > +	if (sys_gettid() == master_tid) {
> > > +		/* no-op, we return to uc_link of the test context
> > > */
> > > +	} else {
> > > +		sys_exit(0);
> > > +	}
> > > +}
> > > +
> > > +/* loads a test binary and fills out the struct test instance */
> > > +static int test_make(struct test *test, const ucontext_t *ctx)
> > > +{
> > > +	assert(opt_head == NULL);
> > > +
> > > +	/* dlopen() the test binary; the constructors will execute
> > > at this point and register the test's options */
> > > +	test->handle = dlopen(test->name, RTLD_NOW);
> > > +	if (test->handle == NULL) {
> > > +		pr_err("Failed to dlopen(\"%s\"): %s\n", test-
> > > >name, dlerror());
> > > +		return -1;
> > > +	}
> > > +
> > > +	/* save the per-test options */
> > > +	test->opts_head = opt_head;
> > > +	opt_head = NULL;
> > > +
> > > +	/* get the entry point */
> > > +	test->main = dlsym(test->handle, "main");
> > > +	if (test->main == NULL) {
> > > +		pr_err("Failed to dlsym(\"main\") in \"%s\":
> > > %s\n", test->name, dlerror());
> > > +		return -1;
> > > +	}
> > > +
> > > +	/* prepare the test's execution context */
> > > +	test->ctx = *ctx;
> > > +	test->ctx.uc_stack.ss_sp = test->ctx_stack;
> > > +	test->ctx.uc_stack.ss_size = sizeof(test->ctx_stack);
> > > +	test->ctx.uc_link = &return_ctx;
> > > +
> > > +	makecontext(&test->ctx, &test_main, 0);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/* sets all communication global variables and switches to the
> > > test context */
> > > +static void test_run(struct test *test)
> > > +{
> > > +	int r;
> > > +
> > > +	test_check_ctx(test);
> > > +
> > > +	if (have_pending_signals) {
> > > +		pr_err("Trying to switch to test \"%s\" with
> > > signals pending from previous tests -- such tests must be last in
> > > sequence\n", test->name);
> > > +		abort();
> > > +	}
> > > +
> > > +	opt_head = test->opts_head;
> > > +	current_test = test;
> > > +	multitest_enabled = 1;
> > > +
> > > +	test->in_progress = 1;
> > > +	r = swapcontext(&return_ctx, &test->ctx);
> > > +
> > > +	if (r < 0) {
> > > +		pr_perror("Failed to swapcontext() to switch to
> > > the test context for \"%s\"", test->name);
> > > +		abort();
> > > +	}
> > > +
> > > +	opt_head = NULL;
> > > +	current_test = NULL;
> > > +	multitest_enabled = 0;
> > > +}
> > > +
> > > +static int test_thread_main(void *arg)
> > > +{
> > > +	struct test *test = arg;
> > > +	assert(test != NULL);
> > > +
> > > +	int r = setcontext(&test->ctx);
> > > +
> > > +	pr_perror("Failed to setcontext() in busy loop thread to
> > > switch to the test context");
> > > +	abort();
> > > +}
> > > +
> > > +/* creates an additional thread and switches to the test context
> > > there */
> > > +static void test_run_thread(struct test *test)
> > > +{
> > > +	int r;
> > > +
> > > +	/*
> > > +	 * Note that we do not set any global variables and expect
> > > +	 * multitest_enabled to be set by the caller. This is
> > > because we
> > > +	 * do not use TLS for these variables to avoid increasing
> > > the process
> > > +	 * environment complexity too much.
> > > +	 */
> > > +
> > > +	assert(multitest_enabled == 1);
> > > +	assert(threading_enabled == 1);
> > > +	assert(current_test == NULL);
> > > +
> > > +	test_check_ctx(test);
> > > +
> > > +	if (have_pending_signals) {
> > > +		pr_err("Trying to create a thread for test \"%s\"
> > > with signals pending from previous tests -- such tests must be last
> > > in sequence\n", test->name);
> > > +		abort();
> > > +	}
> > > +
> > > +	/*
> > > +	 * Let's create a temporary stack for the new thread.
> > > +	 * We don't know the direction in which it grows, and
> > > clone(2) API
> > > +	 * doesn't abstract this for us, so we allocate twice what
> > > we need and
> > > +	 * point new thread to the middle of the temporary stack
> > > space.
> > > +	 */
> > > +
> > > +	futex_init(&test_thread_futex);
> > > +
> > > +	char test_thread_stack[2*1<<10];
> > > +
> > > +	test->in_progress = 1;
> > > +	r = clone(&test_thread_main, test_thread_stack +
> > > sizeof(test_thread_stack) / 2,
> > > +	          0 /* no signal */ |
> > > +	          CLONE_FILES |
> > > +	          CLONE_FS |
> > > +	          CLONE_IO |
> > > +	          CLONE_SIGHAND |
> > > +	          CLONE_SYSVSEM |
> > > +	          CLONE_THREAD |
> > > +	          CLONE_VM |
> > > +	          CLONE_CHILD_SETTID |
> > > +	          CLONE_CHILD_CLEARTID,
> > > +	          test /* arg */,
> > > +	          NULL /* ptid */,
> > > +	          NULL /* tls */,
> > > +	          &test->tid /* ctid */);
> > > +
> > > +	if (r < 0) {
> > > +		pr_perror("Failed to clone() to create a busy-loop 
> > > thread for test \"%s\"", test->name);
> > > +		abort();
> > > +	}
> > > +
> > > +	/*
> > > +	 * Wait until the control returns to the test to be able
> > > to destroy the
> > > +	 * temporary stack.
> > > +	 */
> > > +
> > > +	futex_wait_while(&test_thread_futex, 0);
> > > +}
> > > +
> > > +/* says whether the call needs to be hooked */
> > > +int test_multi_hooked(void)
> > > +{
> > > +	if (getpid() != master_pid) {
> > > +		return 0;
> > > +	}
> > > +
> > > +	if (!multitest_enabled) {
> > > +		return 0;
> > > +	}
> > > +
> > > +	if (!threading_enabled == (sys_gettid() != master_tid)) {
> > > +		return 0;
> > > +	}
> > > +
> > > +	return 1;
> > > +}
> > > +
> > > +/* returns from the test context */
> > > +void test_multi_return(enum test_phase phase)
> > > +{
> > > +	int r;
> > > +	struct test *test = test_current();
> > > +
> > > +	sigset_t pending_signals;
> > > +	sigemptyset(&pending_signals);
> > > +	r = sigpending(&pending_signals);
> > > +	if (r < 0) {
> > > +		pr_perror("Failed to sigpending() to check for
> > > stray signals at return to launcher");
> > > +		abort();
> > > +	}
> > > +
> > > +	if (!sigisemptyset(&pending_signals)) {
> > > +		pr_err("Pending signals at exit from test on phase
> > > %s\n", test_phase_str[phase]);
> > > +		for (int sig = 0; sig < SIGRTMAX; ++sig) {
> > > +			if (sigismember(&pending_signals, sig)) {
> > > +				pr_err("Pending signal: %d
> > > (%s)\n", sig, sys_siglist[sig] ?: "<unknown>");
> > > +			}
> > > +		}
> > > +
> > > +		/*
> > > +		 * We shouldn't have let ourselves even launch any
> > > tests after
> > > +		 * returning from one of them with pending
> > > signals.
> > > +		 */
> > > +		assert(!have_pending_signals);
> > > +		have_pending_signals = 1;
> > > +	}
> > > +
> > > +	test->in_progress = 0;
> > > +	test->phase = phase;
> > > +
> > > +	if (!test->tid) {
> > > +		r = swapcontext(&test->ctx, &return_ctx);
> > > +		if (r < 0) {
> > > +			pr_perror("Failed to swapcontext() to jump
> > > to launcher");
> > > +			abort();
> > > +		}
> > > +	} else {
> > > +		r = getcontext(&test->ctx);
> > > +		if (r < 0) {
> > > +			pr_perror("Failed to getcontext() to save
> > > the busy-loop thread context in tid %d", test->tid);
> > > +			abort();
> > > +		}
> > > +
> > > +		if (!test->in_progress) {
> > > +			sys_exit(0);
> > > +		}
> > > +	}
> > > +
> > > +	futex_set_and_wake(&test_thread_futex, 1);
> > > +}
> > > +
> > > +/* ultimately returns from the test context, simulating exit() */
> > > +NORETURN void test_multi_exit(int status)
> > > +{
> > > +	struct test *test = test_current();
> > > +
> > > +	test->phase = PHASE_RETURNED;
> > > +	test->retcode = status;
> > > +
> > > +	if (!test->tid) {
> > > +		setcontext(&return_ctx);
> > > +
> > > +		pr_perror("Failed to setcontext() to exit from
> > > test");
> > > +		abort();
> > > +	} else {
> > > +		sys_exit(0);
> > > +	}
> > > +}
> > > +
> > > +void test_multi_report(enum test_result result)
> > > +{
> > > +	assert(current_test != NULL);
> > > +
> > > +	current_test->result = result;
> > > +}
> > > +
> > > +void test_multi_go(int cr_done)
> > > +{
> > > +	struct test *test = test_current();
> > > +
> > > +	/*
> > > +	 * If test_go() is called between test_daemon() and
> > > test_waitsig(),
> > > +	 * this is an indication of that the test wants to spin in
> > > a busy loop
> > > +	 * during C/R.
> > > +	 * Otherwise, this is uninteresting for us.
> > > +	 */
> > > +
> > > +	if (test->phase < PHASE_TEST_DAEMON ||
> > > +	    test->phase >= PHASE_TEST_WAITSIG) {
> > > +		return;
> > > +	}
> > > +
> > > +	if (test->tid) {
> > > +		++test->busy_loop_iters;
> > > +
> > > +		if (cr_done) {
> > > +			test_multi_return(PHASE_TEST_WAITSIG);
> > > +		}
> > > +	} else {
> > > +		assert(cr_done != 0);
> > > +		pr_err("test_go() called from non-threaded test
> > > before test_waitsig(): \"%s\"\n", current_test->name);
> > > +		abort();
> > > +	}
> > > +}
> > > +
> > > +void test_multi_waitsig(void)
> > > +{
> > > +	struct test *test = test_current();
> > > +
> > > +	if (test->phase >= PHASE_TEST_WAITSIG) {
> > > +		return;
> > > +	}
> > > +
> > > +	if (test->tid) {
> > > +		test_multi_return(PHASE_TEST_WAITSIG);
> > > +	} else {
> > > +		/* even if we don't return to the launcher here,
> > > we still need
> > > +		 * to remember that we're past test_waitsig() */
> > > +		test->phase = PHASE_TEST_WAITSIG;
> > > +	}
> > > +}
> > > +
> > > +static void usage(const char *argv0)
> > > +{
> > > +	fprintf(stderr,
> > > +	        "Usage: %s [<global options...>] --test <path to
> > > test .so> [<test .so options...>] ...\n"
> > > +	        "Global options:\n",
> > > +	        argv0);
> > > +
> > > +	for (struct long_opt *opt = global_opts_head; opt != NULL;
> > > opt = opt->next) {
> > > +		print_opt(opt);
> > > +	}
> > > +}
> > > +
> > > +NORETURN void exit(int status)
> > > +{
> > > +	assert(real_exit != NULL);
> > > +
> > > +	/* if we are outside of any test, or if this is a test's
> > > ancillary
> > > +	 * child process, exit right away */
> > > +	if (!test_multi_hooked()) {
> > > +		real_exit(status);
> > > +	}
> > > +
> > > +	/* otherwise treat this as a return from test's main() */
> > > +	test_multi_exit(status);
> > > +}
> > > +
> > > +int main(int argc, char **argv)
> > > +{
> > > +	if (argc < 2) {
> > > +		pr_err("This program expects at least one
> > > argument.\n");
> > > +		usage(argv[0]);
> > > +		return 1;
> > > +	}
> > > +
> > > +	int r;
> > > +
> > > +	/*
> > > +	 * Find the real exit() function.
> > > +	 */
> > > +
> > > +	real_exit = dlsym(RTLD_NEXT, "exit");
> > > +	assert(real_exit != NULL);
> > > +
> > > +	/*
> > > +	 * Capture a base execution context for all tests before
> > > we modify the
> > > +	 * execution environment.
> > > +	 *
> > > +	 * Note that even at this point we may be running with
> > > signals blocked
> > > +	 * due to fork+exec in test_init() not resetting signal
> > > masks, so
> > > +	 * force-clear the signal mask in the base test execution
> > > context.
> > > +	 *
> > > +	 * I hope that no tests rely on masking some signals
> > > before test_init()
> > > +	 * and expecting that mask to survive fork+exec to the
> > > final child...
> > > +	 */
> > > +
> > > +	ucontext_t test_context;
> > > +	r = getcontext(&test_context);
> > > +	if (r < 0) {
> > > +		pr_perror("Failed to getcontext()");
> > > +		return 1;
> > > +	}
> > > +	sigemptyset(&test_context.uc_sigmask);
> > > +
> > > +	/*
> > > +	 * Process arguments.
> > > +	 * We split the global argv into several sub-arrays per
> > > each loaded test
> > > +	 * and use arguments before the first "--lib*" as globals.
> > > +	 */
> > > +
> > > +	pr_debug("Reading arguments and initializing");
> > > +
> > > +	real_argc = argc;
> > > +	real_argv = argv;
> > > +
> > > +	global_opts_head = opt_head;
> > > +	opt_head = NULL;
> > > +
> > > +	__cleanup(tests_free) struct test *test_head = NULL;
> > > +	struct test *test_tail = NULL;
> > > +
> > > +	/* allocate and load tests */
> > > +	for (ssize_t a = real_argc - 1; a >= 0; --a) {
> > > +		int is_threaded;
> > > +
> > > +		if (!strcmp(argv[a], "--lib")) {
> > > +			is_threaded = 0;
> > > +		} else if (!strcmp(argv[a], "--lib-threaded")) {
> > > +			is_threaded = 1;
> > > +		} else {
> > > +			continue;
> > > +		}
> > > +
> > > +		if (a >= real_argc - 1) {
> > > +			pr_err("Test binary path expected after
> > > last '--lib'\n");
> > > +			return 1;
> > > +		}
> > > +
> > > +		struct test *test = calloc(1, sizeof(struct
> > > test));
> > > +		test->name = argv[a+1];
> > > +		test->argv = &argv[a+1];
> > > +		test->is_threaded = is_threaded;
> > > +
> > > +		test->next = test_head;
> > > +		test_head = test;
> > > +		if (test_tail == NULL) {
> > > +			test_tail = test;
> > > +		}
> > > +	}
> > > +
> > > +	if (test_head == NULL) {
> > > +		pr_err("No tests specified.\n");
> > > +		usage(argv[0]);
> > > +		return 1;
> > > +	}
> > > +
> > > +	all_tests = test_head;
> > > +
> > > +	/* compute argument array boundaries */
> > > +	argc = test_head->argv - argv - 1;
> > > +	for (struct test *test = test_head; test->next != NULL;
> > > test = test->next) {
> > > +		test->argc = test->next->argv - test->argv - 1;
> > > +	}
> > > +	test_tail->argc = argv + real_argc - test_tail->argv;
> > > +
> > > +	/*
> > > +	 * Then load tests binaries one by one, saving option
> > > descriptor lists
> > > +	 * pertaining to each of them, and prepare execution
> > > contexts.
> > > +	 */
> > > +
> > > +	pr_debug("Loading tests");
> > > +
> > > +	for (struct test *test = test_head; test != NULL; test =
> > > test->next) {
> > > +		r = test_make(test, &test_context);
> > > +		if (r < 0) {
> > > +			return 1;
> > > +		}
> > > +	}
> > > +
> > > +	/*
> > > +	 * Then finally run tests. Each of them will return
> > > control (using
> > > +	 * swapcontext()) after reaching key points in their
> > > execution --
> > > +	 * at test_init(), at test_daemon() and after return from
> > > main().
> > > +	 * After letting all tests go up to the same point, we
> > > actually
> > > +	 * perform the corresponding action and then continue all
> > > tests
> > > +	 * up to the next stage.
> > > +	 *
> > > +	 * Now we run the tests from start of main() to
> > > test_init().
> > > +	 * At entry to test_init() the tests take an alternative
> > > path which
> > > +	 * saves the context and passes the control back to the
> > > launcher.
> > > +	 */
> > > +
> > > +	pr_debug("Running tests init phase");
> > > +
> > > +	/* HACK: run parseargs() on global arguments before
> > > everything to
> > > +	 * propagate global arguments to children which are
> > > created before
> > > +	 * test_init() and run test_ext_init() themselves.
> > > +	 * (see zdtm/live/static/socket-ext.c for why this
> > > matters) */
> > > +	opt_head = global_opts_head;
> > > +	parseargs(argc, argv);
> > > +	opt_head = NULL;
> > > +
> > > +	/* Fetch pid and tid for the threading detection machinery
> > > to work */
> > > +	master_pid = getpid();
> > > +	master_tid = sys_gettid();
> > > +
> > > +	enum test_phase test_init_kind;
> > > +
> > > +	for (struct test *test = test_head; test != NULL; test =
> > > test->next) {
> > > +		pr_debug("Initializing test \"%s\"", test->name);
> > > +		test_run(test);
> > > +
> > > +		if (test->phase != PHASE_TEST_INIT &&
> > > +		    test->phase != PHASE_TEST_EXT_INIT) {
> > > +			pr_err("Test \"%s\" returned with
> > > unexpected phase: %s\n", test->name, test_phase_str[test->phase]);
> > > +			return 1;
> > > +		}
> > > +
> > > +		if (test == test_head) {
> > > +			test_init_kind = test->phase;
> > > +		} else if (test->phase != test_init_kind) {
> > > +			pr_err("Test \"%s\" wants %s while
> > > previous tests want %s -- mixing different kinds not supported\n",
> > > test->name, test_phase_str[test->phase],
> > > test_phase_str[test_init_kind]);
> > > +			return 1;
> > > +		}
> > > +
> > > +		if (have_pending_signals) {
> > > +			pr_err("Pending signals in tests in init
> > > phase are not supported\n");
> > > +			return 1;
> > > +		}
> > > +	}
> > > +
> > > +	/*
> > > +	 * Then run the real test_init(), parsing "global"
> > > arguments (which
> > > +	 * appear on the command line before any test
> > > specifications) against
> > > +	 * "global" option descriptors (registered by
> > > libzdtmtst.so's ctors).
> > > +	 */
> > > +
> > > +	switch(test_init_kind) {
> > > +	case PHASE_TEST_INIT:
> > > +		test_init(1, argv);
> > > +		break;
> > > +
> > > +	case PHASE_TEST_EXT_INIT:
> > > +		test_ext_init(1, argv);
> > > +		break;
> > > +
> > > +	default:
> > > +		pr_err("Switch error\n");
> > > +		return 1;
> > > +	}
> > > +
> > > +	/* update tid because we fork in test_init() (pid is
> > > updated there) */
> > > +	assert(master_pid == getpid());
> > > +	master_tid = sys_gettid();
> > > +
> > > +	/*
> > > +	 * After doing global init, finally block all signals
> > > except SIGTERM
> > > +	 * to avoid spoiling tests which call into test_daemon()
> > > with some signals pending.
> > > +	 */
> > > +
> > > +	sigset_t all_but_sigterm;
> > > +	sigfillset(&all_but_sigterm);
> > > +	sigdelset(&all_but_sigterm, SIGTERM);
> > > +	r = sigprocmask(SIG_BLOCK, &all_but_sigterm, NULL);
> > > +	if (r < 0) {
> > > +		pr_perror("Failed to sigprocmask(SIG_BLOCK) all
> > > signals but SIGTERM");
> > > +		return 1;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Run the tests from test_init() to test_daemon(). At
> > > this point,
> > > +	 * after restoring context inside test_init(), we parse
> > > test-specific
> > > +	 * arguments against test-specific option descriptors.
> > > +	 *
> > > +	 * Again, inside test_daemon() tests take a fake code path
> > > which passes
> > > +	 * the control back and immediately returns after
> > > regaining control,
> > > +	 * because the real daemonization happens here "once and
> > > for all".
> > > +	 */
> > > +
> > > +	pr_debug("Running tests prepare phase");
> > > +
> > > +	/*
> > > +	 * Right now we support only one test which returns with
> > > pending signals
> > > +	 * in prepare phase.
> > > +	 */
> > > +	struct test *pending_signals_test = NULL;
> > > +
> > > +	for (struct test *test = test_head; test != NULL; test =
> > > test->next) {
> > > +		pr_debug("Preparing test \"%s\"", test->name);
> > > +		test_run(test);
> > > +
> > > +		if (test->phase != PHASE_TEST_DAEMON) {
> > > +			pr_err("Test \"%s\" returned with
> > > unexpected phase: %s\n", test->name, test_phase_str[test->phase]);
> > > +			return 1;
> > > +		}
> > > +
> > > +		/* If we got some pending signals, remember this
> > > test as the
> > > +		 * "creator" of those. Note that we are not ready
> > > for more
> > > +		 * than one such test (and also for it being non-
> > > last). */
> > > +		if (have_pending_signals) {
> > > +			assert(pending_signals_test == NULL);
> > > +			pending_signals_test = test;
> > > +		}
> > > +	}
> > > +
> > > +	/*
> > > +	 * And here goes the most interesting part. Some tests
> > > want to run busy
> > > +	 * loops while the C/R is in progress. Obviously, if we
> > > want to run
> > > +	 * multiple such tests in one process and preserve their
> > > value, we will need
> > > +	 * to do some threading.
> > > +	 */
> > > +
> > > +	for (struct test *test = test_head; test != NULL; test =
> > > test->next) {
> > > +		if (test->is_threaded) {
> > > +			pr_debug("Running busy loop of test
> > > \"%s\"", test->name);
> > > +
> > > +			multitest_enabled = 1;
> > > +			threading_enabled = 1;
> > > +			test_run_thread(test);
> > > +		}
> > > +	}
> > > +
> > > +	/*
> > > +	 * Now really daemonize and wait for SIGTERM from the
> > > parent.
> > > +	 */
> > > +
> > > +	pr_debug("Tests prepared, signaling parent for C/R");
> > > +
> > > +	test_daemon();
> > > +	test_waitsig();
> > > +
> > > +	/*
> > > +	 * Wait for all threads to exit.
> > > +	 */
> > > +
> > > +	for (struct test *test = test_head; test != NULL; test =
> > > test->next) {
> > > +		pr_debug("Waiting for the busy loop thread of test
> > > \"%s\"", test->name);
> > > +
> > > +		if (test->is_threaded) {
> > > +			futex_wait_until(&test->tid_futex, 0);
> > > +
> > > +			if (test->phase != PHASE_TEST_WAITSIG) {
> > > +				pr_err("Test \"%s\" exited busy-
> > > loop thread with unexpected phase: %s\n", test->name,
> > > test_phase_str[test->phase]);
> > > +			}
> > > +
> > > +			pr_debug("Busy loop thread of test \"%s\"
> > > made %d iterations", test->name, test->busy_loop_iters);
> > > +			if (test->busy_loop_iters < 2) {
> > > +				pr_err("Busy loop thread of test
> > > \"%s\" made less than one full iteration\n", test->name);
> > > +				abort();
> > > +			}
> > > +		}
> > > +	}
> > > +
> > > +	multitest_enabled = 0;
> > > +	threading_enabled = 0;
> > > +
> > > +	/*
> > > +	 * Run the tests from test_daemon() to the end. Note that
> > > test_waitsig()
> > > +	 * is idempotent, so it will not wait for yet another
> > > SIGTERM.
> > > +	 */
> > > +
> > > +	pr_debug("C/R done, continuing tests to verify phase");
> > > +
> > > +	/* first, reverse the test list in-place */
> > > +	for (struct test *test_tail_initial = test_tail; test_head
> > > != test_tail_initial; ) {
> > > +		test_tail->next = test_head;
> > > +		test_head = test_head->next;
> > > +		test_tail->next->next = NULL;
> > > +	}
> > > +
> > > +	for (struct test *test = test_head; test != NULL; test =
> > > test->next) {
> > > +		pr_debug("Verifying test \"%s\"", test->name);
> > > +
> > > +		/* If we (still) have pending signals, check that
> > > this is
> > > +		 * _the_ test which created those pending signals.
> > > */
> > > +		if (have_pending_signals) {
> > > +			assert(test == pending_signals_test);
> > > +			have_pending_signals = 0;
> > > +		}
> > > +
> > > +		test_run(test);
> > > +
> > > +		if (test->phase != PHASE_RETURNED) {
> > > +			pr_err("Test \"%s\" returned with
> > > unexpected phase: %s\n", test->name, test_phase_str[test->phase]);
> > > +			return 1;
> > > +		}
> > > +	}
> > > +
> > > +	/*
> > > +	 * Done. Check results and do final fail() or pass().
> > > +	 */
> > > +
> > > +	pr_debug("Wow, we survived!");
> > > +
> > > +	for (struct test *test = test_head; test != NULL; test =
> > > test->next) {
> > > +		if (test->retcode != 0) {
> > > +			fail("Test \"%s\" returned %d", test-
> > > >name, test->retcode);
> > > +			return 0;
> > > +		}
> > > +
> > > +		if (test->result != RESULT_PASS) {
> > > +			fail("Test \"%s\" reported %s",
> > > +			     test->name,
> > > +			     test->result == RESULT_FAIL ?
> > > "failure" : "neither success nor failure");
> > > +			return 0;
> > > +		}
> > > +	}
> > > +
> > > +	pass();
> > > +	return 0;
> > > +}
> > > diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c
> > > index fcff71c..06fedce 100644
> > > --- a/test/zdtm/lib/test.c
> > > +++ b/test/zdtm/lib/test.c
> > > @@ -17,6 +17,7 @@
> > >  #include <grp.h>
> > >  
> > >  #include "zdtmtst.h"
> > > +#include "zdtmtst_internal.h"
> > >  #include "lock.h"
> > >  #include "ns.h"
> > >  
> > > @@ -34,11 +35,19 @@ TEST_OPTION(pidfile, string, "file to store
> > > pid", 1);
> > >  
> > >  pid_t master_pid = 0;
> > >  
> > > +int real_argc = 0;
> > > +char **real_argv = NULL;
> > > +
> > >  int test_fork_id(int id)
> > >  {
> > >  	return fork();
> > >  }
> > >  
> > > +__weak int test_multi_hooked(void)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > >  #define INPROGRESS ".inprogress"
> > >  static void test_fini(void)
> > >  {
> > > @@ -65,6 +74,7 @@ static void setup_outfile()
> > >  		fprintf(stderr, "Can't register exit function\n");
> > >  		exit(1);
> > >  	}
> > > +
> > >  	if (test_log_init(outfile, INPROGRESS))
> > >  		exit(1);
> > >  }
> > > @@ -86,6 +96,15 @@ static void redir_stdfds()
> > >  
> > >  void test_ext_init(int argc, char **argv)
> > >  {
> > > +	if (test_multi_hooked()) {
> > > +		test_multi_return(PHASE_TEST_EXT_INIT);
> > > +
> > > +		/* we continue from here after letting all tests
> > > go up to this
> > > +		 * point and running test_ext_init() once and for
> > > all */
> > > +		parseargs(argc, argv);
> > > +		return;
> > > +	}
> > > +
> > >  	parseargs(argc, argv);
> > >  	if (test_log_init(outfile, ".external"))
> > >  		exit(1);
> > > @@ -93,6 +112,15 @@ void test_ext_init(int argc, char **argv)
> > >  
> > >  void test_init(int argc, char **argv)
> > >  {
> > > +	if (test_multi_hooked()) {
> > > +		test_multi_return(PHASE_TEST_INIT);
> > > +
> > > +		/* we continue from here after letting all tests
> > > go up to this
> > > +		 * point and running test_init() once and for all
> > > */
> > > +		parseargs(argc, argv);
> > > +		return;
> > > +	}
> > > +
> > >  	pid_t pid;
> > >  	static FILE *pidf;
> > >  	char *val;
> > > @@ -107,14 +135,15 @@ void test_init(int argc, char **argv)
> > >  	val = getenv("ZDTM_NEWNS");
> > >  	if (val) {
> > >  		if (!strcmp(val, "1")) {
> > > -			ns_create(argc, argv);
> > > +			ns_create(real_argc ?: argc, real_argv ?:
> > > argv);
> > >  			exit(1);
> > >  		}
> > >  
> > >  		if (!strcmp(val, "2")) {
> > >  			test_log_init(outfile, "ns");
> > >  			redir_stdfds();
> > > -			ns_init(argc, argv);
> > > +			ns_init(real_argc ?: argc, real_argv ?:
> > > argv);
> > > +
> > >  		}
> > >  	}
> > >  
> > > @@ -224,6 +253,13 @@ void test_init(int argc, char **argv)
> > >  
> > >  void test_daemon()
> > >  {
> > > +	if (test_multi_hooked()) {
> > > +		test_multi_return(PHASE_TEST_DAEMON);
> > > +
> > > +		/* we continue from here after C/R */
> > > +		return;
> > > +	}
> > > +
> > >  	pid_t ppid;
> > >  
> > >  	ppid = getppid();
> > > @@ -243,14 +279,29 @@ out:
> > >  
> > >  int test_go(void)
> > >  {
> > > -	return !futex_get(&sig_received);
> > > +	int cr_done = futex_get(&sig_received);
> > > +	if (test_multi_hooked()) {
> > > +		test_multi_go(cr_done);
> > > +	}
> > > +	return !cr_done;
> > >  }
> > >  
> > >  void test_waitsig(void)
> > >  {
> > > +	/* in multi-test mode, tests invoke test_waitsig() after
> > > real C/R
> > > +	 * and waiting for signal in the launcher, but
> > > test_waitsig() is
> > > +	 * idempotent (i. e. once a signal has been received, it
> > > will never
> > > +	 * wait anymore), so it will work as is (which is very
> > > good due to
> > > +	 * the fact that test_waitsig() is (ab)used by tests' own
> > > children). */
> > > +	if (test_multi_hooked()) {
> > > +		test_multi_waitsig();
> > > +	}
> > >  	futex_wait_while(&sig_received, 0);
> > >  }
> > >  
> > >  void test_report(int ok)
> > >  {
> > > +	if (test_multi_hooked()) {
> > > +		test_multi_report(ok ? RESULT_PASS : RESULT_FAIL);
> > > +	}
> > >  }
> > > diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h
> > > index 1ccc2f2..6837c45 100644
> > > --- a/test/zdtm/lib/zdtmtst.h
> > > +++ b/test/zdtm/lib/zdtmtst.h
> > > @@ -39,7 +39,7 @@ extern int test_go(void);
> > >  /* sleep until SIGTERM is delivered */
> > >  extern void test_waitsig(void);
> > >  /* report the test result (apart from logging PASS or FAIL) */
> > > -extern void test_report(int result);
> > > +extern void test_report(int ok);
> > >  
> > >  #include <stdint.h>
> > >  
> > > @@ -119,12 +119,24 @@ extern int zdtm_seccomp;
> > >  	test_msg("ERR: %s:%d: " format " (errno = %d (%s))\n", \
> > >  		__FILE__, __LINE__, ## arg, errno,
> > > strerror(errno))
> > >  #define fail(format, arg...)	\
> > > -	test_msg("FAIL: %s:%d: " format " (errno = %d (%s))\n", \
> > > -		 __FILE__, __LINE__, ## arg, errno,
> > > strerror(errno))
> > > +	do { \
> > > +		test_report(0); \
> > > +		test_msg("FAIL: %s:%d: " format " (errno = %d
> > > (%s))\n", \
> > > +			 __FILE__, __LINE__, ## arg, errno,
> > > strerror(errno)); \
> > > +	} while (0)
> > >  #define skip(format, arg...)	\
> > > -	test_msg("SKIP: %s:%d: " format "\n", \
> > > -		 __FILE__, __LINE__, ## arg)
> > > -#define pass()	test_msg("PASS\n")
> > > +	do { \
> > > +		test_report(1); \
> > > +		test_msg("SKIP: %s:%d: " format "\n", \
> > > +			 __FILE__, __LINE__, ## arg); \
> > > +	} while (0)
> > > +#define pass() \
> > > +	do { \
> > > +		test_report(1); \
> > > +		test_msg("PASS\n"); \
> > > +	} while (0)
> > > +
> > > +#define log(fmt, args...) fprintf(stderr, fmt "\n", ## args)
> > >  
> > >  #ifndef NDEBUG
> > >  #define assert(expr) \
> > > diff --git a/test/zdtm/lib/zdtmtst_internal.h
> > > b/test/zdtm/lib/zdtmtst_internal.h
> > > index c0c90fc..355fca0 100644
> > > --- a/test/zdtm/lib/zdtmtst_internal.h
> > > +++ b/test/zdtm/lib/zdtmtst_internal.h
> > > @@ -9,4 +9,32 @@ extern struct long_opt *opt_head;
> > >  /* our main pid, i. e. right after forking in test_init() */
> > >  extern pid_t master_pid;
> > >  
> > > +/* "real" (initial) argc and argv -- may be set by main() if it
> > > wants to
> > > + * pass partial parameters to test_init().
> > > + * "real" parameters will be used if test_init() decides to
> > > reexec. */
> > > +extern int real_argc;
> > > +extern char **real_argv;
> > > +
> > > +enum test_result {
> > > +	RESULT_INDETERMINATE = 0,
> > > +	RESULT_PASS,
> > > +	RESULT_FAIL
> > > +};
> > > +
> > > +enum test_phase {
> > > +	PHASE_START = 0,
> > > +	PHASE_TEST_INIT,
> > > +	PHASE_TEST_EXT_INIT,
> > > +	PHASE_TEST_DAEMON,
> > > +	PHASE_TEST_WAITSIG,
> > > +	PHASE_RETURNED,
> > > +};
> > > +
> > > +__weak extern int test_multi_hooked(void);
> > > +__weak extern void test_multi_return(enum test_phase phase);
> > > +__weak extern void test_multi_report(enum test_result result);
> > > +__weak NORETURN extern void test_multi_exit(int status);
> > > +__weak extern void test_multi_go(int cr_done);
> > > +__weak extern void test_multi_waitsig(void);
> > > +
> > >  #endif /* _ZDTMTST_INTERNAL_H_ */