[Devel] [patch -mm 10/17] nsproxy: add unshare_ns and bind_ns syscalls
Dmitry Mishin
dim at openvz.org
Wed Dec 6 05:06:08 PST 2006
On Tuesday 05 December 2006 13:28, clg at fr.ibm.com wrote:
> From: Cedric Le Goater <clg at fr.ibm.com>
[skip]
> +static int switch_ns(int id, unsigned long flags)
> +{
> + int err = 0;
> + struct nsproxy *ns = NULL, *old_ns = NULL, *new_ns = NULL;
> +
> + if (flags & ~NS_ALL)
> + return -EINVAL;
> +
> + /* Let 0 be a default value ? */
> + if (!flags)
> + flags = NS_ALL;
> +
> + if (id < 0) {
> + struct task_struct *p;
> +
> + err = -ESRCH;
> + read_lock(&tasklist_lock);
> + p = find_task_by_pid(-id);
> + if (p) {
> + task_lock(p);
> + get_nsproxy(p->nsproxy);
> + ns = p->nsproxy;
> + task_unlock(p);
> + }
> + read_unlock(&tasklist_lock);
> + } else {
> + err = -ENOENT;
> + spin_lock_irq(&ns_hash_lock);
> + ns = ns_hash_find(id);
> + spin_unlock_irq(&ns_hash_lock);
> + }
> +
> + if (!ns)
> + goto out;
> +
> + new_ns = ns;
> +
> + /*
> + * clone current nsproxy and populate it with the namespaces
> + * chosen by flags.
> + */
> + if (flags != NS_ALL) {
> + new_ns = dup_namespaces(current->nsproxy);
> + if (!new_ns) {
> + err = -ENOMEM;
> + goto out_ns;
> + }
> +
> + if (flags & NS_MNT) {
> + put_mnt_ns(new_ns->mnt_ns);
> + get_mnt_ns(ns->mnt_ns);
> + new_ns->mnt_ns = ns->mnt_ns;
> + }
> +
> + if (flags & NS_UTS) {
> + put_uts_ns(new_ns->uts_ns);
> + get_uts_ns(ns->uts_ns);
> + new_ns->uts_ns = ns->uts_ns;
> + }
> +
> + if (flags & NS_IPC) {
> + put_ipc_ns(new_ns->ipc_ns);
> + new_ns->ipc_ns = get_ipc_ns(ns->ipc_ns);
> + }
<<<< This code looks useless for me, as at this time new_ns->any_ptr ==
ns->any_ptr.
> + out_ns:
> + put_nsproxy(ns);
> + }
> +
> + task_lock(current);
> + if (new_ns) {
> + old_ns = current->nsproxy;
> + current->nsproxy = new_ns;
> + }
> + task_unlock(current);
> +
> + if (old_ns)
> + put_nsproxy(old_ns);
> +
> + err = 0;
> +out:
> + return err;
> +}
> +
> +
> +/*
> + * bind_ns - bind the nsproxy of a task to an id or bind a task to a
> + * identified nsproxy
> + *
> + * @id: nsproxy identifier if positive or pid if negative
> + * @flags: identifies the namespaces to bind to
> + *
> + * bind_ns serves 2 purposes.
> + *
> + * The first is to bind the nsproxy of the current task to the
> + * identifier @id. If the identifier is already used, -EBUSY is
> + * returned. If the nsproxy is already bound, -EACCES is returned.
> + * flags is not used in that case.
> + *
> + * The second use is to bind the current task to a subset of
> + * namespaces of an identified nsproxy. If positive, @id is considered
> + * being an nsproxy identifier previously used to bind the nsproxy to
> + * @id. If negative, @id is the pid of a task which is another way to
> + * identify a nsproxy. Switching nsproxy is restricted to tasks within
> + * nsproxy 0, the default nsproxy. If unknown, -ENOENT is returned.
> + * @flags is used to bind the task to the selected namespaces.
> + *
> + * Both uses may return -EINVAL for invalid arguments and -EPERM for
> + * insufficient privileges.
> + *
> + * Returns 0 on success.
> + */
> +asmlinkage long sys_bind_ns(int id, unsigned long flags)
> +{
> + struct nsproxy *ns = current->nsproxy;
> + int ret = 0;
> +
> + /*
> + * ns is being changed by switch_ns(), protect it
> + */
> + get_nsproxy(ns);
> +
> + /*
> + * protect ns->id
> + */
> + spin_lock(&ns->nslock);
> + switch (ns->id) {
> + case -1:
> + /*
> + * only an unbound nsproxy can be bound to an id.
> + */
> + ret = bind_ns(id, ns);
> + break;
> +
> + case 0:
> + if (!capable(CAP_SYS_ADMIN)) {
> + ret = -EPERM;
> + goto unlock;
> + }
> +
> + /*
> + * only nsproxy 0 can switch nsproxy. if target id is
> + * 0, this is a nop.
> + */
> + if (id)
> + ret = switch_ns(id, flags);
> + break;
> +
> + default:
> + /*
> + * current nsproxy is already bound. forbid any
> + * switch.
> + */
> + ret = -EACCES;
> + }
> +unlock:
> + spin_unlock(&ns->nslock);
> + put_nsproxy(ns);
> + return ret;
> +}
> +
> +/*
> + * sys_unshare_ns - unshare one or more of namespaces which were
> + * originally shared using clone.
> + *
> + * @unshare_ns_flags: identifies the namespaces to unshare
> + *
> + * this is for the moment a pale copy of unshare but we expect to
> + * diverge when the number of namespaces increases. the number of
> + * available clone flags is also very low. This is one way to get
> + * some air.
> + *
> + */
> +asmlinkage long sys_unshare_ns(unsigned long unshare_ns_flags)
> +{
> + int err = 0;
> + struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
> + struct fs_struct *fs, *new_fs = NULL;
> + struct mnt_namespace *mnt, *new_mnt = NULL;
> + struct uts_namespace *uts, *new_uts = NULL;
> + struct ipc_namespace *ipc, *new_ipc = NULL;
> + unsigned long unshare_flags = 0;
> +
> + /* Return -EINVAL for all unsupported flags */
> + err = -EINVAL;
> + if (unshare_ns_flags & ~NS_ALL)
> + goto bad_unshare_ns_out;
> +
> + /*
> + * compatibility with unshare()/clone() : convert ns flags to
> + * clone flags
> + */
> + if (unshare_ns_flags & NS_MNT)
> + unshare_flags |= CLONE_NEWNS|CLONE_FS;
> + if (unshare_ns_flags & NS_UTS)
> + unshare_flags |= CLONE_NEWUTS;
> + if (unshare_ns_flags & NS_IPC)
> + unshare_flags |= CLONE_NEWIPC;
> +
> + if ((err = unshare_fs(unshare_flags, &new_fs)))
> + goto bad_unshare_ns_out;
> + if ((err = unshare_mnt_ns(unshare_flags, &new_mnt, new_fs)))
> + goto bad_unshare_ns_cleanup_fs;
> + if ((err = unshare_utsname(unshare_flags, &new_uts)))
> + goto bad_unshare_ns_cleanup_mnt;
> + if ((err = unshare_ipcs(unshare_flags, &new_ipc)))
> + goto bad_unshare_ns_cleanup_uts;
> +
> + if (new_mnt || new_uts || new_ipc) {
> + old_nsproxy = current->nsproxy;
> + new_nsproxy = dup_namespaces(old_nsproxy);
> + if (!new_nsproxy) {
> + err = -ENOMEM;
> + goto bad_unshare_ns_cleanup_ipc;
> + }
> + }
> +
> + if (new_fs || new_mnt || new_uts || new_ipc) {
> +
> + task_lock(current);
> +
> + if (new_nsproxy) {
> + current->nsproxy = new_nsproxy;
> + new_nsproxy = old_nsproxy;
> + }
> +
> + if (new_fs) {
> + fs = current->fs;
> + current->fs = new_fs;
> + new_fs = fs;
> + }
> +
> + if (new_mnt) {
> + mnt = current->nsproxy->mnt_ns;
> + current->nsproxy->mnt_ns = new_mnt;
> + new_mnt = mnt;
> + }
> +
> + if (new_uts) {
> + uts = current->nsproxy->uts_ns;
> + current->nsproxy->uts_ns = new_uts;
> + new_uts = uts;
> + }
> +
> + if (new_ipc) {
> + ipc = current->nsproxy->ipc_ns;
> + current->nsproxy->ipc_ns = new_ipc;
> + new_ipc = ipc;
> + }
> +
> + task_unlock(current);
> + }
> +
> + if (new_nsproxy)
> + put_nsproxy(new_nsproxy);
> +
> +bad_unshare_ns_cleanup_ipc:
> + if (new_ipc)
> + put_ipc_ns(new_ipc);
> +
> +bad_unshare_ns_cleanup_uts:
> + if (new_uts)
> + put_uts_ns(new_uts);
> +
> +bad_unshare_ns_cleanup_mnt:
> + if (new_mnt)
> + put_mnt_ns(new_mnt);
> +
> +bad_unshare_ns_cleanup_fs:
> + if (new_fs)
> + put_fs_struct(new_fs);
> +
> +bad_unshare_ns_out:
> + return err;
> +}
> +
> static int __init nshash_init(void)
> {
> int i;
> Index: 2.6.19-rc6-mm2/kernel/sys_ni.c
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/kernel/sys_ni.c
> +++ 2.6.19-rc6-mm2/kernel/sys_ni.c
> @@ -146,3 +146,7 @@ cond_syscall(compat_sys_migrate_pages);
> cond_syscall(sys_bdflush);
> cond_syscall(sys_ioprio_set);
> cond_syscall(sys_ioprio_get);
> +
> +/* user resources syscalls */
> +cond_syscall(sys_unshare_ns);
> +cond_syscall(sys_bind_ns);
> Index: 2.6.19-rc6-mm2/arch/i386/kernel/syscall_table.S
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/arch/i386/kernel/syscall_table.S
> +++ 2.6.19-rc6-mm2/arch/i386/kernel/syscall_table.S
> @@ -323,3 +323,5 @@ ENTRY(sys_call_table)
> .long sys_kevent_ctl
> .long sys_kevent_wait
> .long sys_kevent_ring_init
> + .long sys_unshare_ns
> + .long sys_bind_ns /* 325 */
> Index: 2.6.19-rc6-mm2/arch/ia64/kernel/entry.S
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/arch/ia64/kernel/entry.S
> +++ 2.6.19-rc6-mm2/arch/ia64/kernel/entry.S
> @@ -1610,5 +1610,7 @@ sys_call_table:
> data8 sys_sync_file_range // 1300
> data8 sys_tee
> data8 sys_vmsplice
> + data8 sys_unshare_ns
> + data8 sys_bind_ns
>
> .org sys_call_table + 8*NR_syscalls // guard against failures to increase
> NR_syscalls Index: 2.6.19-rc6-mm2/arch/s390/kernel/compat_wrapper.S
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/arch/s390/kernel/compat_wrapper.S
> +++ 2.6.19-rc6-mm2/arch/s390/kernel/compat_wrapper.S
> @@ -1665,3 +1665,14 @@ sys_getcpu_wrapper:
> llgtr %r3,%r3 # unsigned *
> llgtr %r4,%r4 # struct getcpu_cache *
> jg sys_getcpu
> +
> + .globl sys_unshare_ns_wrapper
> +sys_unshare_ns_wrapper:
> + llgfr %r2,%r2 # unsigned long
> + jg sys_unshare_ns
> +
> + .globl sys_bind_ns_wrapper
> +sys_bind_ns_wrapper:
> + lgfr %r2,%r2 # int
> + llgfr %r3,%r3 # unsigned long
> + jg sys_bind_ns
> Index: 2.6.19-rc6-mm2/arch/s390/kernel/syscalls.S
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/arch/s390/kernel/syscalls.S
> +++ 2.6.19-rc6-mm2/arch/s390/kernel/syscalls.S
> @@ -321,3 +321,5 @@ SYSCALL(sys_vmsplice,sys_vmsplice,compat
> NI_SYSCALL /* 310 sys_move_pages */
> SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper)
> SYSCALL(sys_epoll_pwait,sys_epoll_pwait,sys_ni_syscall)
> +SYSCALL(sys_unshare_ns,sys_unshare_ns,sys_unshare_ns_wrapper)
> +SYSCALL(sys_bind_ns,sys_bind_ns,sys_bind_ns_wrapper)
> Index: 2.6.19-rc6-mm2/arch/x86_64/ia32/ia32entry.S
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/arch/x86_64/ia32/ia32entry.S
> +++ 2.6.19-rc6-mm2/arch/x86_64/ia32/ia32entry.S
> @@ -722,4 +722,6 @@ ia32_sys_call_table:
> .quad sys_kevent_ctl /* 320 */
> .quad sys_kevent_wait
> .quad sys_kevent_ring_init
> + .quad sys_unshare_ns
> + .quad sys_bind_ns
> ia32_syscall_end:
> Index: 2.6.19-rc6-mm2/include/asm-i386/unistd.h
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/include/asm-i386/unistd.h
> +++ 2.6.19-rc6-mm2/include/asm-i386/unistd.h
> @@ -329,10 +329,12 @@
> #define __NR_kevent_ctl 321
> #define __NR_kevent_wait 322
> #define __NR_kevent_ring_init 323
> +#define __NR_unshare_ns 324
> +#define __NR_bind_ns 325
>
> #ifdef __KERNEL__
>
> -#define NR_syscalls 324
> +#define NR_syscalls 326
>
> #define __ARCH_WANT_IPC_PARSE_VERSION
> #define __ARCH_WANT_OLD_READDIR
> Index: 2.6.19-rc6-mm2/include/asm-ia64/unistd.h
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/include/asm-ia64/unistd.h
> +++ 2.6.19-rc6-mm2/include/asm-ia64/unistd.h
> @@ -291,11 +291,13 @@
> #define __NR_sync_file_range 1300
> #define __NR_tee 1301
> #define __NR_vmsplice 1302
> +#define __NR_unshare_ns 1303
> +#define __NR_bind_ns 1304
>
> #ifdef __KERNEL__
>
>
> -#define NR_syscalls 279 /* length of syscall table */
> +#define NR_syscalls 281 /* length of syscall table */
>
> #define __ARCH_WANT_SYS_RT_SIGACTION
>
> Index: 2.6.19-rc6-mm2/include/asm-powerpc/systbl.h
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/include/asm-powerpc/systbl.h
> +++ 2.6.19-rc6-mm2/include/asm-powerpc/systbl.h
> @@ -305,3 +305,5 @@ SYSCALL_SPU(faccessat)
> COMPAT_SYS_SPU(get_robust_list)
> COMPAT_SYS_SPU(set_robust_list)
> COMPAT_SYS(move_pages)
> +SYSCALL_SPU(unshare_ns)
> +SYSCALL_SPU(bind_ns)
> Index: 2.6.19-rc6-mm2/include/asm-powerpc/unistd.h
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/include/asm-powerpc/unistd.h
> +++ 2.6.19-rc6-mm2/include/asm-powerpc/unistd.h
> @@ -324,10 +324,12 @@
> #define __NR_get_robust_list 299
> #define __NR_set_robust_list 300
> #define __NR_move_pages 301
> +#define __NR_unshare_ns 302
> +#define __NR_bind_ns 303
>
> #ifdef __KERNEL__
>
> -#define __NR_syscalls 302
> +#define __NR_syscalls 304
>
> #define __NR__exit __NR_exit
> #define NR_syscalls __NR_syscalls
> Index: 2.6.19-rc6-mm2/include/asm-s390/unistd.h
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/include/asm-s390/unistd.h
> +++ 2.6.19-rc6-mm2/include/asm-s390/unistd.h
> @@ -250,8 +250,10 @@
> /* Number 310 is reserved for new sys_move_pages */
> #define __NR_getcpu 311
> #define __NR_epoll_pwait 312
> +#define __NR_unshare_ns 313
> +#define __NR_bind_ns 314
>
> -#define NR_syscalls 313
> +#define NR_syscalls 315
>
> /*
> * There are some system calls that are not present on 64 bit, some
> Index: 2.6.19-rc6-mm2/include/asm-x86_64/unistd.h
> ===================================================================
> --- 2.6.19-rc6-mm2.orig/include/asm-x86_64/unistd.h
> +++ 2.6.19-rc6-mm2/include/asm-x86_64/unistd.h
> @@ -627,8 +627,12 @@ __SYSCALL(__NR_kevent_ctl, sys_kevent_ct
> __SYSCALL(__NR_kevent_wait, sys_kevent_wait)
> #define __NR_kevent_ring_init 283
> __SYSCALL(__NR_kevent_ring_init, sys_kevent_ring_init)
> +#define __NR_unshare_ns 284
> +__SYSCALL(__NR_unshare_ns, sys_unshare_ns)
> +#define __NR_bind_ns 285
> +__SYSCALL(__NR_bind_ns, sys_bind_ns)
>
> -#define __NR_syscall_max __NR_kevent_ring_init
> +#define __NR_syscall_max __NR_bind_ns
>
> #ifndef __NO_STUBS
> #define __ARCH_WANT_OLD_READDIR
--
Thanks,
Dmitry.
More information about the Devel
mailing list