[CRIU] [RFC PATCHv2 30/32] zdtm: launcher: initial implementation of fork()+wait() management

Ivan Shapovalov intelfx at intelfx.name
Sat Mar 12 00:42:29 PST 2016


As pointed out in review, if multiple tests fork() off their own sets of child
processes and then attempt to wait() or waitpid(-1) for them, it is very likely
that the tests will end up receiving status information for wrong processes.

This could be fixed by rewriting all tests to only use waitpid() with specific
pids, but, in spirit of this patchset, let's instead intercept fork() and wait()
families of library calls and, by keeping track of child processes' ownership,
make wait() do The Right Thing transparently.

Signed-off-by: Ivan Shapovalov <intelfx at intelfx.name>
---
 test/zdtm/launcher/launcher.c | 432 +++++++++++++++++++++++++++++++++++++++++-
 test/zdtm/lib/lock.h          |   7 +-
 2 files changed, 434 insertions(+), 5 deletions(-)

diff --git a/test/zdtm/launcher/launcher.c b/test/zdtm/launcher/launcher.c
index 06a3f34..bd9ad4c 100644
--- a/test/zdtm/launcher/launcher.c
+++ b/test/zdtm/launcher/launcher.c
@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include <ctype.h>
 #include <stdlib.h>
+#include <stdarg.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <linux/unistd.h>
@@ -16,6 +17,18 @@
 #include "lock.h"
 
 #define memzero(arg) memset(&arg, 0, sizeof(arg))
+#define elementsof(arg) (sizeof(arg) / sizeof(arg[0]))
+
+struct pid_data
+{
+	siginfo_t info;
+	int status;
+};
+
+typedef struct pid_list
+{
+	 struct pid_data pids[1024], *pids_end;
+} pid_list_t;
 
 /* options defined in libzdtmtst itself */
 static struct long_opt *global_opts_head;
@@ -24,8 +37,15 @@ static struct long_opt *global_opts_head;
 static ucontext_t return_ctx;
 static struct test *current_test, *all_tests;
 
-/* real exit() function */
+/* real function pointers for intercepted libc functions */
 static NORETURN void(*real_exit)(int);
+static pid_t(*real_fork)(void);
+static int(*real_clone)(int(*fn)(void*), void *child_stack, int flags,
+                        void *arg, ...);
+static pid_t(*real_wait)(int *status);
+static pid_t(*real_waitpid)(pid_t pid, int *status, int options);
+static int(*real_waitid)(idtype_t idtype, id_t id, siginfo_t *infop,
+                         int options);
 
 /* main thread id of the launcher */
 static pid_t master_tid;
@@ -44,6 +64,14 @@ static int threading_enabled;
  * (which means that we must not run any more tests) */
 static int have_pending_signals;
 
+/* children which have been wait()'ed for but not yet claimed by any of the
+ * tests */
+pid_list_t pending_children;
+
+/* a guard mutex for our hooks of fork(), wait() and friends; this is to
+ * make sure that we don't miss updates of pending_children */
+mutex_t waiting_mutex;
+
 static long sys_gettid(void)
 {
         return syscall(__NR_gettid);
@@ -55,6 +83,13 @@ static NORETURN void sys_exit(int status)
 	__builtin_unreachable();
 }
 
+void* real_func(const char *name)
+{
+	void *ptr = dlsym(RTLD_NEXT, name);
+	assert(ptr != NULL);
+	return ptr;
+}
+
 static const char *test_phase_str[] = {
 	[PHASE_START] = "<main not started>",
 	[PHASE_TEST_INIT] = "test_init",
@@ -64,6 +99,150 @@ static const char *test_phase_str[] = {
 	[PHASE_RETURNED] = "<main finished>",
 };
 
+void pid_list_init(pid_list_t *list)
+{
+	list->pids_end = list->pids;
+}
+
+int pid_list_empty(const pid_list_t *list)
+{
+	for (const struct pid_data *p = list->pids; p != list->pids_end; ++p) {
+		if (p->info.si_pid != 0) {
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+/* add a pid to a list of pids */
+void pid_list_add(pid_list_t *list, const struct pid_data *pd)
+{
+	assert(pd->info.si_pid != 0);
+
+	if (list->pids_end - list->pids >= elementsof(list->pids)) {
+		pr_err("Trying to store too many pids\n");
+		abort();
+	}
+
+	*list->pids_end++ = *pd;
+}
+
+void pid_list_add_no_data(pid_list_t *list, pid_t pid)
+{
+	struct pid_data pd = {
+		.info = {
+			.si_pid = pid
+		}
+	};
+
+	pid_list_add(list, &pd);
+}
+
+/* check if this pid list entry matches given pid and given waitid()-style
+ * options */
+int pid_list_match(const struct pid_data *pd, pid_t pid, int options)
+{
+	/* first check pid */
+	if (pid > 0 ? pd->info.si_pid != pid
+	            : pd->info.si_pid <= 0) {
+		return 0;
+	}
+
+	/* then check type */
+	if (options != 0) {
+		switch (pd->info.si_code) {
+		case CLD_EXITED:
+		case CLD_KILLED:
+		case CLD_DUMPED:
+			return (options & WEXITED);
+
+		case CLD_STOPPED:
+		case CLD_TRAPPED:
+			return (options & WSTOPPED);
+
+		case CLD_CONTINUED:
+			return (options & WCONTINUED);
+
+		default:
+			pr_err("Switch error\n");
+			abort();
+		}
+	}
+
+	/* affirm by default */
+	return 1;
+}
+
+/* find a pid in a list of pids */
+struct pid_data *pid_list_find(pid_list_t *list, pid_t pid, int options)
+{
+	for (struct pid_data *p = list->pids; p != list->pids_end; ++p) {
+		if (pid_list_match(p, pid, options)) {
+			return p;
+		}
+	}
+
+	return NULL;
+}
+
+void pid_list_apply_event(struct pid_data *ch, const struct pid_data *ev, int options)
+{
+	if (!(options & WNOWAIT) &&
+	    (ev->info.si_code == CLD_EXITED ||
+	     ev->info.si_code == CLD_KILLED ||
+	     ev->info.si_code == CLD_DUMPED)) {
+		ch->info.si_pid = 0;
+	}
+}
+
+/* find given pid (non-positive means "any") in both lists. if found:
+ * 1) remove from "events" if not NOWAIT;
+ * 2) remove from "children" if not NOWAIT and if the found event means death.
+ * return found pid on success and 0 otherwise. */
+pid_t pid_list_take_event(pid_list_t *events, pid_list_t *children, pid_t pid,
+                          struct pid_data *out, int options)
+{
+	struct pid_data *ev, *ch;
+
+	if (pid > 0) {
+		if ((ev = pid_list_find(events, pid, options)) != NULL &&
+		    (ch = pid_list_find(children, pid, 0)) != NULL) {
+			goto pid_found;
+		}
+	} else {
+		for (ev = events->pids; ev != events->pids_end; ++ev) {
+			if (!pid_list_match(ev, -1, options)) {
+				continue;
+			}
+
+			for (ch = children->pids; ch != children->pids_end; ++ch) {
+				if (ev->info.si_pid == ch->info.si_pid) {
+					pid = ev->info.si_pid;
+					goto pid_found;
+				}
+			}
+		}
+	}
+
+	return 0;
+
+pid_found:
+	if (out != NULL) {
+		*out = *ev;
+	}
+
+	if (!(options & WNOWAIT)) {
+		/* "remove" the pid from events list */
+		ev->info.si_pid = 0;
+	}
+
+	/* "remove" the pid from children list */
+	pid_list_apply_event(ch, ev, options);
+
+	return pid;
+}
+
 /* describes a single test executable which is dlopen()'ed */
 struct test
 {
@@ -105,6 +284,9 @@ struct test
 		futex_t tid_futex;
 	};
 
+	/* children created by this test */
+	pid_list_t children;
+
 	/* the alternate stack for the test execution context */
 	char ctx_stack[8*1<<20];
 };
@@ -238,6 +420,8 @@ static int test_make(struct test *test, const ucontext_t *ctx)
 
 	makecontext(&test->ctx, &test_main, 0);
 
+	pid_list_init(&test->children);
+
 	return 0;
 }
 
@@ -519,6 +703,240 @@ NORETURN void exit(int status)
 	test_multi_exit(status);
 }
 
+/* fork() wrapper to remember per-test ownership of created children
+ * which is later used to allow tests to wait() for their children only */
+pid_t fork(void)
+{
+	if (!test_multi_hooked()) {
+		return real_fork();
+	}
+
+	int r = real_fork();
+	if (r > 0) {
+		struct test *test = test_current();
+		__cleanup_unlock__ mutex_t *m = mutex_lock(&waiting_mutex);
+
+		pr_debug("Test \"%s\" forked a child %d -- registering",
+			 test->name, r);
+
+		pid_list_add_no_data(&test->children, r);
+	}
+
+	return r;
+}
+
+/* clone() wrapper to remember per-test ownership of created children
+ * which is later used to allow tests to wait() for their children only */
+int clone(int(*fn)(void*), void *child_stack, int flags, void *arg, ...)
+{
+	va_list ap;
+	va_start(ap, arg);
+
+	pid_t *ptid = va_arg(ap, pid_t *);
+	struct user_desc *tls = va_arg(ap, struct user_desc *);
+	pid_t *ctid = va_arg(ap, pid_t *);
+
+	va_end(ap);
+
+	if (!test_multi_hooked()) {
+		return real_clone(fn, child_stack, flags, arg, ptid, tls, ctid);
+	}
+
+	int r = real_clone(fn, child_stack, flags, arg, ptid, tls, ctid);
+	if (r > 0) {
+		struct test *test = test_current();
+		__cleanup_unlock__ mutex_t *m = mutex_lock(&waiting_mutex);
+
+		pr_debug("Test \"%s\" cloned a child %d -- registering",
+			 test->name, r);
+
+		pid_list_add_no_data(&test->children, r);
+	}
+
+	return r;
+}
+
+/* main wait()-wrap function which implements selective waiting */
+int waitid_internal(idtype_t idtype, id_t id, int *wait_pid, int *wait_status,
+                    siginfo_t *waitid_info, int options)
+{
+	/*
+	 * Violence is not an aberration, it's a rule...
+	 *
+	 * So what are we doing here?
+	 *
+	 * At fork time we remember "ownership" of children by storing their
+	 * pids into current_test()->children.
+	 * Consequently, at wait time we first do an actual waitid() and then
+	 * check if the pid is in current_test()->children. If it is, then we
+	 * remove it from that list and return it. Otherwise, we put that pid
+	 * into pending_children and wait for another one.
+	 * The next waitid() will first check pending_children if there are any
+	 * children belonging to the current test and claim one, if found.
+	 *
+	 * On top of this we, of course, need filtering (to be able to query
+	 * pending_children according to waitid options) and conversion of
+	 * waitid-style results (.si_code + .si_status) into wait/waitpid-style
+	 * results (a single int which is read using W*() macros).
+	 *
+	 * All this still leaves one gap: we cannot implement waitid(P_PGID)
+	 * because waitid does not report PGID of waited-for children. But this
+	 * doesn't seem to be used in tests, which is very good for us.
+	 */
+
+	if (idtype == P_PGID) {
+		pr_err("waitid(P_PGID) is not implemented\n");
+		abort();
+	}
+
+	assert((idtype == P_ALL) || (idtype == P_PID));
+
+	int r;
+	pid_t pid = idtype == P_PID ? id : -1;
+	struct test *test = test_current();
+	__cleanup_unlock__ mutex_t *m = mutex_lock(&waiting_mutex);
+	struct pid_data ev = {0}, *ch = NULL;
+
+	if (!(pid > 0) && pid_list_empty(&test->children)) {
+		pr_err("Test \"%s\" called wait() with an empty child list "
+		       "-- missed a child at creation time?\n",
+		       test->name);
+		errno = ECHILD;
+		return -1;
+	}
+
+	if (pid > 0 && pid_list_find(&test->children, pid, 0) == NULL) {
+		pr_err("Test \"%s\" called waitpid() with an unknown pid %d "
+		       "-- missed a child at creation time?\n",
+		       test->name, pid);
+		errno = ECHILD;
+		return -1;
+	}
+
+	/* first, try to take a child from pending_children & test->children */
+	if ((r = pid_list_take_event(&pending_children, &test->children,
+	                             pid, &ev, options)) != 0) {
+		pr_debug("Test \"%s\" claimed pending child %d",
+		         test->name, r);
+		goto assign_and_ret;
+	}
+
+	for (;;) {
+		/* if there are no pending children for this test,
+		 * wait() for someone */
+		/* FIXME: we are wait()'ing under a lock... */
+		r = real_waitid(idtype, id, &ev.info, options);
+		if (r < 0 || ev.info.si_pid == 0) {
+			return r;
+		}
+
+		/* convert siginfo_t fields to wait()-style status */
+		/* FIXME: isn't there a standards-compliant way to do this? */
+		ev.status = 0;
+		switch(ev.info.si_code) {
+		case CLD_EXITED:
+			ev.status |= W_EXITCODE(ev.info.si_status & 0xff, 0);
+			break;
+
+		case CLD_DUMPED:
+			ev.status |= WCOREFLAG;
+			/* fall-through */
+
+		case CLD_KILLED:
+			ev.status |= W_EXITCODE(0, ev.info.si_status & 0x7f);
+			break;
+
+		case CLD_TRAPPED:
+		case CLD_STOPPED:
+			ev.status |= W_STOPCODE(ev.info.si_status & 0xff);
+			break;
+
+		case CLD_CONTINUED:
+			ev.status |= __W_CONTINUED;
+			break;
+
+		default:
+			pr_err("Switch error\n");
+			abort();
+		}
+
+		/* see if that newly-waited-for child is appropriate for us */
+		if ((ch = pid_list_find(&test->children, ev.info.si_pid, 0)) != NULL) {
+			pr_debug("Test \"%s\" claimed newly-waited-for child %d",
+			         test->name, ev.info.si_pid);
+			pid_list_apply_event(ch, &ev, options);
+			goto assign_and_ret;
+		}
+
+		/* otherwise put it into pending children and wait
+		 * for the next */
+		pr_debug("We don't want newly-waited-for child %d, waiting again",
+		         test->name, ev.info.si_pid);
+		pid_list_add(&pending_children, &ev);
+	}
+
+assign_and_ret:
+	if (wait_pid != NULL) {
+		*wait_pid = ev.info.si_pid;
+	}
+	if (wait_status != NULL) {
+		*wait_status = ev.status;
+	}
+	if (waitid_info != NULL) {
+		*waitid_info = ev.info;
+	}
+	return 0;
+}
+
+int waitid(idtype_t idtype, id_t id, siginfo_t *info, int options)
+{
+	if (!test_multi_hooked()) {
+		return real_waitid(idtype, id, info, options);
+	}
+
+	return waitid_internal(idtype, id, NULL, NULL, info, options);
+}
+
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+	if (!test_multi_hooked()) {
+		return real_waitpid(pid, status, options);
+	}
+
+	idtype_t idtype;
+	id_t id;
+
+	if (pid > 0) {
+		idtype = P_PID;
+		id = pid;
+	} else if (pid == -1) {
+		idtype = P_ALL;
+		id = 0;
+	} else {
+		pr_err("waitpid(%d, ...) is not implemented\n", pid);
+		abort();
+	}
+
+	pid_t r_pid = 0;
+	int r = waitid_internal(idtype, id, &r_pid, status, NULL, WEXITED | options);
+
+	return r < 0 ? r
+		     : r_pid;
+}
+
+pid_t wait(int *status)
+{
+	if (!test_multi_hooked()) {
+		return real_wait(status);
+	}
+
+	pid_t r_pid = 0;
+	int r = waitid_internal(P_ALL, 0, &r_pid, status, NULL, WEXITED);
+
+	return r < 0 ? r
+		     : r_pid;
+}
+
 int main(int argc, char **argv)
 {
 	if (argc < 2) {
@@ -530,11 +948,17 @@ int main(int argc, char **argv)
 	int r;
 
 	/*
-	 * Find the real exit() function.
+	 * Find the real exit(), fork(), wait() and friends.
 	 */
 
-	real_exit = dlsym(RTLD_NEXT, "exit");
-	assert(real_exit != NULL);
+	real_exit = real_func("exit");
+	real_fork = real_func("fork");
+	real_clone = real_func("clone");
+	real_wait = real_func("wait");
+	real_waitpid = real_func("waitpid");
+	real_waitid = real_func("waitid");
+
+	pid_list_init(&pending_children);
 
 	/*
 	 * Capture a base execution context for all tests before we modify the
diff --git a/test/zdtm/lib/lock.h b/test/zdtm/lib/lock.h
index 362de49..d038b54 100644
--- a/test/zdtm/lib/lock.h
+++ b/test/zdtm/lib/lock.h
@@ -137,7 +137,7 @@ static void inline mutex_init(mutex_t *m)
 	atomic_set(&m->raw, c);
 }
 
-static void inline mutex_lock(mutex_t *m)
+static mutex_t inline *mutex_lock(mutex_t *m)
 {
 	uint32_t c;
 	int ret;
@@ -146,6 +146,8 @@ static void inline mutex_lock(mutex_t *m)
 		ret = sys_futex(&m->raw, FUTEX_WAIT, c + 1, NULL, NULL, 0);
 		BUG_ON(ret < 0 && ret != -EWOULDBLOCK);
 	}
+
+	return m;
 }
 
 static void inline mutex_unlock(mutex_t *m)
@@ -155,4 +157,7 @@ static void inline mutex_unlock(mutex_t *m)
 	BUG_ON(sys_futex(&m->raw, FUTEX_WAKE, 1, NULL, NULL, 0) < 0);
 }
 
+DEFINE_TRIVIAL_CLEANUP_FUNC(mutex_t *, mutex_unlock)
+#define __cleanup_unlock__ __cleanup(mutex_unlockp)
+
 #endif /* CR_LOCK_H_ */
-- 
2.7.2



More information about the CRIU mailing list