[Devel] namespace and nsproxy syscalls

Cedric Le Goater clg at fr.ibm.com
Tue Sep 26 02:42:10 PDT 2006


Hello all,

A while ago, we expressed the need to have a new syscall specific to
namespaces. the clone and unshare are good candidates but we are reaching
the limit of the clone flags and clone has been hijacked enough. 

So, I came up with unshare_ns. the patch for the core feature follows
the email. Not much difference with unshare() for the moment but it gives
us the freedom to diverge when new namespaces come in. I have faith also ! 
If you feel it's useful, i'll send the full patchset for review on the list.

I'd like to discuss of another syscall which would allow a process to
bind to a set of namespaces ( == nsproxy == container) : 

	bind_ns(ns_id_t id, int flags) 

bind_ns binds the current nsproxy to an id. You can only bind once and 
you can use this id to bind another process to the same nsproxy.

a few comments :

* ns_id_t could be an int, a const char*, a struct ns_addr*. this is
  to be defined.
* bind_ns applies to nsproxy and not to namespaces. we could bind a 
  specific namespace to an id using flags but i don't see the need.
  (but why not with a flags 0 defining ALL namespaces)
* semantic is close to shmat but i don't think we need a shmdt because
  nsproxies are not resilient objects.

Thanks,

C.


From: Cedric Le Goater <clg at fr.ibm.com>
Subject: add unshare_ns syscall core routine

This patch adds the unshare_ns syscall core routine.

This syscall is an unshare dedicated to namespaces.

sample user program : 

/*
 * unshare_ns.c
 *
 * author: Cedric Le Goater <clg at fr.ibm.com>
 *
 * (C) Copyright IBM Corp. 2005, 2006
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>

#include <linux/unistd.h>

#ifndef __NR_unshare
#if __i386__
#    define __NR_unshare_ns 319
#elif __x86_64__
#    define __NR_unshare_ns 280
#elif __ia64__
#    define __NR_unshare_ns 1303
#elif __s390x__
#    define __NR_unshare_ns 310
#elif __powerpc__
#    define __NR_unshare_ns 301
#else
#    error "Architecture not supported"
#endif
#endif

static inline _syscall1 (int,  unshare_ns, int, flags)

#define UNSHARE_NS_MNT		0x00000001
#define UNSHARE_NS_UTS		0x00000002
#define UNSHARE_NS_IPC		0x00000004
#define UNSHARE_NS_USER		0x00000008
#define UNSHARE_NS_NET		0x00000010
#define UNSHARE_NS_PID		0x00000020

static void usage(const char *name)
{
	printf("usage: %s [-Hiunmpeh]\n", name);
	printf("\t-H : unshare utsname namespace.\n");
	printf("\t-i : unshare ipc namespace.\n");
	printf("\t-u : unshare user namespace.\n");
	printf("\t-n : unshare net namespace.\n");
	printf("\t-m : unshare mount namespace.\n");
	printf("\t-p : unshare pid namespace.\n");
	printf("\t-e : exec command.\n");
	printf("\n");
	printf("(C) Copyright IBM Corp. 2005, 2006\n");
	printf("\n");
	exit(1);
}

int main(int argc, char* argv[])
{
	int c;
	unsigned long flag = 0;
	int exec = 0;

	while ((c = getopt(argc, argv, "+Hiunmpeh")) != EOF) {
		switch (c) {
		case 'i': flag |= UNSHARE_NS_IPC; break;
		case 'u': flag |= UNSHARE_NS_USER; break;
		case 'H': flag |= UNSHARE_NS_UTS; break;
		case 'n': flag |= UNSHARE_NS_NET; break;
		case 'm': flag |= UNSHARE_NS_MNT; break;
		case 'p': flag |= UNSHARE_NS_PID; break;
		case 'e': exec = 1; break;
		case 'h':
		default:
			usage(argv[0]);
		}
	};
    
	argv = &argv[optind];
	argc = argc - optind;
	
	if (unshare_ns(flag) == -1) {
		perror("unshare_ns");
		return 1;
	}

	if (exec) {
		execve(argv[0], argv, __environ);
		fprintf(stderr, "execve(%s) : %s\n", argv[0], strerror(errno));
		return 1;
	}

	return 0;
}


Signed-off-by: Cedric Le Goater <clg at fr.ibm.com>

---
 include/linux/sched.h    |   10 ++++
 include/linux/syscalls.h |    1 
 kernel/fork.c            |  106 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)

Index: 2.6.18-mm1/include/linux/sched.h
===================================================================
--- 2.6.18-mm1.orig/include/linux/sched.h
+++ 2.6.18-mm1/include/linux/sched.h
@@ -28,6 +28,16 @@
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
 
 /*
+ * unshare_ns flags:
+ */
+#define UNSHARE_NS_MNT		0x00000001
+#define UNSHARE_NS_UTS		0x00000002
+#define UNSHARE_NS_IPC		0x00000004
+#define UNSHARE_NS_USER		0x00000008
+#define UNSHARE_NS_NET		0x00000010
+#define UNSHARE_NS_PID		0x00000020
+
+/*
  * Scheduling policies
  */
 #define SCHED_NORMAL		0
Index: 2.6.18-mm1/include/linux/syscalls.h
===================================================================
--- 2.6.18-mm1.orig/include/linux/syscalls.h
+++ 2.6.18-mm1/include/linux/syscalls.h
@@ -580,6 +580,7 @@ asmlinkage long compat_sys_newfstatat(un
 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
 				   int flags, int mode);
 asmlinkage long sys_unshare(unsigned long unshare_flags);
+asmlinkage long sys_unshare_ns(unsigned long unshare_flags);
 
 asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
 			   int fd_out, loff_t __user *off_out,
Index: 2.6.18-mm1/kernel/fork.c
===================================================================
--- 2.6.18-mm1.orig/kernel/fork.c
+++ 2.6.18-mm1/kernel/fork.c
@@ -1761,3 +1761,109 @@ bad_unshare_cleanup_thread:
 bad_unshare_out:
 	return err;
 }
+
+/*
+ * unshare_ns allows a process to 'unshare' one or more of its
+ * namespaces which were originally shared using clone.
+ */
+asmlinkage long sys_unshare_ns(unsigned long unshare_ns_flags)
+{
+	int err = 0;
+	struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+	struct fs_struct *fs, *new_fs = NULL;
+	struct mnt_namespace *mnt, *new_mnt = NULL;
+	struct uts_namespace *uts, *new_uts = NULL;
+	struct ipc_namespace *ipc, *new_ipc = NULL;
+	unsigned long unshare_flags = 0;
+
+	/* Return -EINVAL for all unsupported flags */
+	err = -EINVAL;
+	if (unshare_ns_flags & ~(UNSHARE_NS_MNT|UNSHARE_NS_UTS|UNSHARE_NS_IPC|
+				 UNSHARE_NS_USER|UNSHARE_NS_NET|
+				 UNSHARE_NS_PID))
+		goto bad_unshare_ns_out;
+
+	/* convert unshare_ns flags to clone flags */
+	if (unshare_ns_flags & UNSHARE_NS_MNT)
+		unshare_flags |= CLONE_NEWNS|CLONE_FS;
+	if (unshare_ns_flags & UNSHARE_NS_UTS)
+		unshare_flags |= CLONE_NEWUTS;
+	if (unshare_ns_flags & UNSHARE_NS_IPC)
+		unshare_flags |= CLONE_NEWIPC;
+
+	if ((err = unshare_fs(unshare_flags, &new_fs)))
+		goto bad_unshare_ns_out;
+	if ((err = unshare_mnt_namespace(unshare_flags, &new_mnt, new_fs)))
+		goto bad_unshare_ns_cleanup_fs;
+	if ((err = unshare_utsname(unshare_flags, &new_uts)))
+		goto bad_unshare_ns_cleanup_mnt;
+	if ((err = unshare_ipcs(unshare_flags, &new_ipc)))
+		goto bad_unshare_ns_cleanup_uts;
+
+	if (new_mnt || new_uts || new_ipc) {
+		old_nsproxy = current->nsproxy;
+		new_nsproxy = dup_namespaces(old_nsproxy);
+		if (!new_nsproxy) {
+			err = -ENOMEM;
+			goto bad_unshare_ns_cleanup_ipc;
+		}
+	}
+
+	if (new_fs || new_mnt || new_uts || new_ipc) {
+
+		task_lock(current);
+
+		if (new_nsproxy) {
+			current->nsproxy = new_nsproxy;
+			new_nsproxy = old_nsproxy;
+		}
+
+		if (new_fs) {
+			fs = current->fs;
+			current->fs = new_fs;
+			new_fs = fs;
+		}
+
+		if (new_mnt) {
+			mnt = current->nsproxy->mnt_ns;
+			current->nsproxy->mnt_ns = new_mnt;
+			new_mnt = mnt;
+		}
+
+		if (new_uts) {
+			uts = current->nsproxy->uts_ns;
+			current->nsproxy->uts_ns = new_uts;
+			new_uts = uts;
+		}
+
+		if (new_ipc) {
+			ipc = current->nsproxy->ipc_ns;
+			current->nsproxy->ipc_ns = new_ipc;
+			new_ipc = ipc;
+		}
+
+		task_unlock(current);
+	}
+
+	if (new_nsproxy)
+		put_nsproxy(new_nsproxy);
+
+bad_unshare_ns_cleanup_ipc:
+	if (new_ipc)
+		put_ipc_ns(new_ipc);
+
+bad_unshare_ns_cleanup_uts:
+	if (new_uts)
+		put_uts_ns(new_uts);
+
+bad_unshare_ns_cleanup_mnt:
+	if (new_mnt)
+		put_mnt_ns(new_mnt);
+
+bad_unshare_ns_cleanup_fs:
+	if (new_fs)
+		put_fs_struct(new_fs);
+
+bad_unshare_ns_out:
+	return err;
+}
_______________________________________________
Containers mailing list
Containers at lists.osdl.org
https://lists.osdl.org/mailman/listinfo/containers




More information about the Devel mailing list