[Devel] [PATCH 5/9] user-cr: add nsexeccwp to test eclone

Nathan Lynch ntl at pobox.com
Tue Nov 17 16:55:42 PST 2009


From: Serge E. Hallyn <serue at us.ibm.com>

One of the concerns with eclone is whether the stack handling is all
correct and robust enough to withstand real usage.  Little testcases
playing with pid values are also necessary, but can't replace really
using clone-with-pids to start a shell from which to keep working.

This patch tweaks the old ns_exec.c namespace manipulation program to
add a -z option to specify a pid.  So you can:

    nsexeccwp -cmp /bin/bash # start a shell in a new pidns+mntns
    mount -t proc proc /proc # mount private /proc
    echo $$
                1
    nsexeccwp -z /bin/bash   #  start a shell with pid 999
    echo $$
            999

[ ntl - minor updates to original version to use clone_args/eclone() ]

Signed-off-by: Serge E. Hallyn <serue at us.ibm.com>
Signed-off-by: Nathan Lynch <ntl at pobox.com>
---
 Makefile    |    5 +-
 clone.h     |   54 ++++++++++
 nsexeccwp.c |  329 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 387 insertions(+), 1 deletions(-)
 create mode 100644 clone.h
 create mode 100644 nsexeccwp.c

diff --git a/Makefile b/Makefile
index 181cc1c..32a6893 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ CFLAGS += -g $(WARNS) $(CKPT_INCLUDE) $(DEBUG)
 # install dir
 INSTALL_DIR = /bin
 
-PROGS =	checkpoint restart ckptinfo
+PROGS =	checkpoint restart ckptinfo nsexeccwp
 
 # other cleanup
 OTHER = ckptinfo_types.c
@@ -39,11 +39,14 @@ restart: CFLAGS += -D__REENTRANT -pthread
 ifneq ($(SUBARCH),)
 restart: clone_$(SUBARCH).o
 restart: CFLAGS += -DARCH_HAS_CLONE_WITH_PID
+nsexeccwp: clone_$(SUBARCH).o
+nsexeccwp: CFLAGS += -DARCH_HAS_CLONE_WITH_PID
 endif
 
 # on powerpc, need also assembly file
 ifeq ($(SUBARCH),ppc)
 restart: clone_$(SUBARCH)_.o
+nsexeccwp: clone_$(SUBARCH)_.o
 endif
 
 # ckptinfo dependencies
diff --git a/clone.h b/clone.h
new file mode 100644
index 0000000..3569a45
--- /dev/null
+++ b/clone.h
@@ -0,0 +1,54 @@
+#ifndef CLONE_H
+#define CLONE_H
+/*
+ *  Copyright (C) 2007 IBM Corporation
+ *
+ *  Author: Cedric Le Goater <clg at fr.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ *
+ */
+#include <sys/syscall.h>
+
+#ifndef HAVE_UNSHARE
+
+#if __i386__
+#    define __NR_unshare 310
+#elif __x86_64__
+#    define __NR_unshare 272
+#elif __ia64__
+#    define __NR_unshare 1296
+#elif __s390x__
+#    define __NR_unshare 303
+#elif __powerpc__
+#    define __NR_unshare 282
+#else
+#    error "Architecture not supported"
+#endif
+
+#endif /* HAVE_UNSHARE */
+
+#ifndef CLONE_NEWUTS
+#define CLONE_NEWUTS		0x04000000
+#endif
+
+#ifndef CLONE_NEWIPC
+#define CLONE_NEWIPC		0x08000000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER		0x10000000
+#endif
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID		0x20000000
+#endif
+
+#ifndef CLONE_NEWNET
+#define CLONE_NEWNET		0x40000000
+#endif
+
+#endif /* CLONE_H */
diff --git a/nsexeccwp.c b/nsexeccwp.c
new file mode 100644
index 0000000..b02c48c
--- /dev/null
+++ b/nsexeccwp.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2008,2009 IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <libgen.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "clone.h"
+#include "eclone.h"
+
+extern pid_t getpgid(pid_t pid);
+extern pid_t getsid(pid_t pid);
+
+static const char* procname;
+
+static void usage(const char *name)
+{
+	printf("usage: %s [-h] [-c] [-mnuUip] [-P <pid-file>]"
+			"[command [arg ..]]\n", name);
+	printf("\n");
+	printf("  -h		this message\n");
+	printf("\n");
+	printf("  -z <pid>	use clone_with_pids and specify chosen pid\n");
+	printf("  		Note that -z and -p are not compatible\n");
+	printf("  -c		use 'clone' rather than 'unshare' system call\n");
+	printf("  -g		launch in new cgroup\n");
+	printf("  -m		mount namespace\n");
+	printf("  -n		network namespace\n");
+	printf("  -u		utsname namespace\n");
+	printf("  -U		userid namespace\n");
+	printf("  -i		ipc namespace\n");
+	printf("  -P <pid-file>	File in which to write global pid of cinit\n");
+	printf("  -p		pid namespace\n");
+	printf("  -f <flag>	extra clone flags\n");
+	printf("\n");
+	printf("(C) Copyright IBM Corp. 2006\n");
+	printf("\n");
+	exit(1);
+}
+
+static int string_to_ul(const char *str, unsigned long int *res)
+{
+	char *tail;
+	long long int r;
+
+	if (!*str)
+		return -1;
+
+	errno = 0;
+
+	r = strtol(str, &tail, 16);
+
+	/*
+	 * according to strtol(3), if errno is set or tail does no point
+	 * to the ending '\0', the conversion failed.
+	 */
+	if (errno || *tail)
+		return -1;
+
+	*res = r;
+	return 0;
+}
+
+/*
+ * Copied following opentty() from Fedora's util-linux rpm
+ * I just changed the "FATAL" message below from syslog()
+ * to printf
+ */
+static void
+opentty(const char * tty) {
+        int i, fd, flags;
+
+        fd = open(tty, O_RDWR | O_NONBLOCK);
+        if (fd == -1) {
+		printf("FATAL: can't reopen tty: %s", strerror(errno));
+                sleep(1);
+                exit(1);
+        }
+
+        flags = fcntl(fd, F_GETFL);
+        flags &= ~O_NONBLOCK;
+        fcntl(fd, F_SETFL, flags);
+
+        for (i = 0; i < fd; i++)
+                close(i);
+        for (i = 0; i < 3; i++)
+                if (fd != i)
+                        dup2(fd, i);
+        if (fd >= 3)
+                close(fd);
+}
+// Code copy end
+
+int do_newcgrp = 0;
+
+int load_cgroup_dir(char *dest, int len)
+{
+	FILE *f = fopen("/proc/mounts", "r");
+	char buf[200];
+	char *name, *path, *fsname, *options, *p1, *p2, *s;
+	if (!f)
+		return 0;
+	while (fgets(buf, 200, f)) {
+		name = strtok_r(buf, " ", &p1);
+		path = strtok_r(NULL, " ", &p1);
+		fsname = strtok_r(NULL, " ", &p1);
+		options = strtok_r(NULL, " ", &p1);
+		if (strcmp(fsname, "cgroup") != 0)
+			continue;
+
+		/* make sure the freezer is composed */
+		s = strtok_r(options, ",", &p2);
+		while (s && strcmp(s, "freezer") != 0)
+			s = strtok_r(NULL, ",", &p2);
+		if (!s)
+			continue;
+		strncpy(dest, path, len);
+		fclose(f);
+		return 1;
+	}
+	fclose(f);
+	printf("Freezer not mounted\n");
+	return 0;
+}
+
+int move_to_new_cgroup(int newcgroup)
+{
+	char cgroupname[150], cgroupbase[100], tasksfname[200];
+	FILE *fout;
+	int ret;
+
+	if (!load_cgroup_dir(cgroupbase, 100))
+		return 0;
+
+	snprintf(cgroupname, 150, "%s/%d", cgroupbase, newcgroup);
+	ret = mkdir(cgroupname, 0755);
+	if (ret)
+		return 0;
+	snprintf(tasksfname, 200, "%s/tasks", cgroupname);
+	fout = fopen(tasksfname, "w");
+	if (!fout)
+		return 0;
+	fprintf(fout, "%d\n", getpid());
+	fclose(fout);
+	return 1;
+}
+
+int pipefd[2];
+
+/* gah. opentty will close the pipefd */
+int check_newcgrp(void)
+{
+	int ret, newgroup;
+	char buf[20];
+
+	if (!do_newcgrp)
+		return 0;
+
+	close(pipefd[1]);
+	ret = read(pipefd[0], buf, 20);
+	close(pipefd[0]);
+	if (ret == -1) {
+		perror("read");
+		return 1;
+	}
+	newgroup = atoi(buf);
+	if (!move_to_new_cgroup(newgroup))
+		return 1;
+	do_newcgrp = 0;
+	return 0;
+}
+
+int do_child(void *vargv)
+{
+	char **argv = vargv;
+
+	if (check_newcgrp())
+		return 1;
+
+	execve(argv[0], argv, __environ);
+	perror("execve");
+	return 1;
+}
+
+void write_pid(char *pid_file, int pid)
+{
+	FILE *fp;
+
+	if (!pid_file)
+		return;
+
+	fp = fopen(pid_file, "w");
+	if (!fp) {
+		perror("fopen, pid_file");
+		exit(1);
+	}
+	fprintf(fp, "%d", pid);
+	fflush(fp);
+	fclose(fp);
+}
+
+int main(int argc, char *argv[])
+{
+	int c;
+	unsigned long flags = 0, eflags = 0;
+	char ttyname[256];
+	int status;
+	int ret, use_clone = 0;
+	int pid;
+	char *pid_file = NULL;
+	size_t nr_pids = 1;
+	pid_t chosen_pid = 0;
+
+	procname = basename(argv[0]);
+
+	memset(ttyname, '\0', sizeof(ttyname));
+	readlink("/proc/self/fd/0", ttyname, sizeof(ttyname));
+
+	while ((c = getopt(argc, argv, "+mguUiphz:cnf:P:")) != EOF) {
+		switch (c) {
+		case 'g': do_newcgrp = getpid();		break;
+		case 'm': flags |= CLONE_NEWNS;			break;
+		case 'c': use_clone = 1;			break;
+		case 'P': pid_file = optarg; 			break;
+		case 'u': flags |= CLONE_NEWUTS;		break;
+		case 'i': flags |= CLONE_NEWIPC;		break;
+		case 'U': flags |= CLONE_NEWUSER;		break;
+		case 'n': flags |= CLONE_NEWNET;		break;
+		case 'p': flags |= CLONE_NEWNS|CLONE_NEWPID;	break;
+		case 'z': chosen_pid = atoi(optarg);		break;
+		case 'f': if (!string_to_ul(optarg, &eflags)) {
+				flags |= eflags;
+				break;
+			}
+		case 'h':
+		default:
+			usage(procname);
+		}
+	};
+
+	if (chosen_pid) {
+		use_clone = 1;
+		if (flags & CLONE_NEWPID) {
+			printf("Error: can't use CLONE_NEWPID and pick a pid\n");
+			exit(1);
+		}
+	}
+	argv = &argv[optind];
+	argc = argc - optind;
+
+	if (do_newcgrp) {
+		ret = pipe(pipefd);
+		if (ret) {
+			perror("pipe");
+			return -1;
+		}
+		do_newcgrp = pipefd[0];
+	}
+
+	if (use_clone) {
+		struct clone_args clone_args;
+		int stacksize = 4*getpagesize();
+		void *stack = malloc(stacksize);
+
+		if (!stack) {
+			perror("malloc");
+			return -1;
+		}
+
+		memset(&clone_args, 0, sizeof(clone_args));
+		clone_args.child_stack = (unsigned long)stack;
+		clone_args.child_stack_size = stacksize;
+		clone_args.nr_pids = nr_pids;
+
+		printf("about to clone with %lx\n", flags);
+		if (chosen_pid)
+			printf("Will choose pid %d\n", chosen_pid);
+		flags |= SIGCHLD;
+		pid = eclone(do_child, argv, flags, &clone_args, &chosen_pid);
+		if (pid == -1) {
+			perror("clone");
+			return -1;
+		}
+	} else {
+		if ((pid = fork()) == 0) {
+			// Child.
+			//print_my_info(procname, ttyname);
+
+			if (check_newcgrp())
+				return 1;
+			opentty(ttyname);
+
+			printf("about to unshare with %lx\n", flags);
+			ret = unshare(flags);
+			if (ret < 0) {
+				perror("unshare");
+				return 1;
+			}
+
+			return do_child((void*)argv);
+		}
+
+	}
+	if (pid != -1 && do_newcgrp) {
+		char buf[20];
+		snprintf(buf, 20, "%d", pid);
+		close(pipefd[0]);
+		write(pipefd[1], buf, strlen(buf)+1);
+		close(pipefd[1]);
+	}
+
+	write_pid(pid_file, pid);
+
+	if ((ret = waitpid(pid, &status, __WALL)) < 0)
+		printf("waitpid() returns %d, errno %d\n", ret, errno);
+
+	exit(0);
+}
-- 
1.6.2.5

_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list