[Devel] [RFC][PATCH 1/2] add user namespace [try #2]
Cedric Le Goater
clg at fr.ibm.com
Mon Aug 28 07:56:44 PDT 2006
This patch adds the user namespace.
Basically, it allows a process to unshare its user_struct table,
resetting at the same time its own user_struct and all the associated
accounting.
A new root user (uid == 0) is added to the user namespace upon
creation. Such root users have full privileges and it seems that
theses privileges should be controlled through some means (process
capabilities ?)
Changes [try #2]
- removed struct user_namespace* argument from find_user()
- added a root_user per user namespace
Signed-off-by: Cedric Le Goater <clg at fr.ibm.com>
Cc: Andrew Morton <akpm at osdl.org>
Cc: Kirill Korotaev <dev at openvz.org>
Cc: Eric W. Biederman <ebiederm at xmission.com>
Cc: Herbert Poetzl <herbert at 13thfloor.at>
Cc: Serge E. Hallyn <serue at us.ibm.com>
Cc: Dave Hansen <haveblue at us.ibm.com>
---
include/linux/init_task.h | 2
include/linux/nsproxy.h | 2
include/linux/sched.h | 4 +
include/linux/user.h | 46 +++++++++++++++
init/Kconfig | 8 ++
kernel/fork.c | 2
kernel/nsproxy.c | 15 ++++-
kernel/sys.c | 5 +
kernel/user.c | 133
++++++++++++++++++++++++++++++++++++++++++----
9 files changed, 203 insertions(+), 14 deletions(-)
Index: 2.6.18-rc4-mm3/kernel/user.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/user.c
+++ 2.6.18-rc4-mm3/kernel/user.c
@@ -14,20 +14,29 @@
#include <linux/bitops.h>
#include <linux/key.h>
#include <linux/interrupt.h>
+#include <linux/user.h>
+#include <linux/module.h>
+#include <linux/nsproxy.h>
/*
* UID task count cache, to get fast user lookup in "alloc_uid"
* when changing user ID's (ie setuid() and friends).
*/
-#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
-#define UIDHASH_SZ (1 << UIDHASH_BITS)
#define UIDHASH_MASK (UIDHASH_SZ - 1)
#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
+#define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid)))
static kmem_cache_t *uid_cachep;
-static struct list_head uidhash_table[UIDHASH_SZ];
+
+struct user_namespace init_user_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .root_user = &root_user,
+};
+
+EXPORT_SYMBOL_GPL(init_user_ns);
/*
* The uidhash_lock is mostly taken from process context, but it is
@@ -84,6 +93,111 @@ static inline struct user_struct *uid_ha
return NULL;
}
+
+#ifdef CONFIG_USER_NS
+
+/*
+ * Clone a new ns copying an original user ns, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
+{
+ struct user_namespace *ns;
+
+ ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+ if (ns) {
+ int n;
+ struct user_struct *new_user;
+
+ kref_init(&ns->kref);
+
+ for(n = 0; n < UIDHASH_SZ; ++n)
+ INIT_LIST_HEAD(ns->uidhash_table + n);
+
+ /* Insert new root user. */
+ ns->root_user = alloc_uid(ns, 0);
+ if (!ns->root_user) {
+ kfree(ns);
+ return NULL;
+ }
+
+ /* Reset current->user with a new one */
+ new_user = alloc_uid(ns, current->uid);
+ if (!new_user) {
+ kfree(ns);
+ return NULL;
+ }
+
+ switch_uid(new_user);
+ }
+ return ns;
+}
+
+/*
+ * unshare the current process' user namespace.
+ */
+int unshare_user_ns(unsigned long unshare_flags,
+ struct user_namespace **new_user)
+{
+ if (unshare_flags & CLONE_NEWUSER) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ *new_user = clone_user_ns(current->nsproxy->user_ns);
+ if (!*new_user)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy task tsk's user namespace, or clone it if flags specifies
+ * CLONE_NEWUSER. In latter case, changes to the user namespace of
+ * this process won't be seen by parent, and vice versa.
+ */
+int copy_user_ns(int flags, struct task_struct *tsk)
+{
+ struct user_namespace *old_ns = tsk->nsproxy->user_ns;
+ struct user_namespace *new_ns;
+ int err = 0;
+
+ if (!old_ns)
+ return 0;
+
+ get_user_ns(old_ns);
+
+ if (!(flags & CLONE_NEWUSER))
+ return 0;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ new_ns = clone_user_ns(old_ns);
+ if (!new_ns) {
+ err = -ENOMEM;
+ goto out;
+ }
+ tsk->nsproxy->user_ns = new_ns;
+
+out:
+ put_user_ns(old_ns);
+ return err;
+}
+
+void free_user_ns(struct kref *kref)
+{
+ struct user_namespace *ns;
+
+ ns = container_of(kref, struct user_namespace, kref);
+ kfree(ns);
+}
+
+#endif /* CONFIG_USER_NS */
+
/*
* Locate the user_struct for the passed UID. If found, take a ref on it.
The
* caller must undo that ref with free_uid().
@@ -94,9 +208,10 @@ struct user_struct *find_user(uid_t uid)
{
struct user_struct *ret;
unsigned long flags;
+ struct user_namespace *ns = current->nsproxy->user_ns;
spin_lock_irqsave(&uidhash_lock, flags);
- ret = uid_hash_find(uid, uidhashentry(uid));
+ ret = uid_hash_find(uid, uidhashentry(ns, uid));
spin_unlock_irqrestore(&uidhash_lock, flags);
return ret;
}
@@ -120,9 +235,9 @@ void free_uid(struct user_struct *up)
}
}
-struct user_struct * alloc_uid(uid_t uid)
+struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
{
- struct list_head *hashent = uidhashentry(uid);
+ struct list_head *hashent = uidhashentry(ns, uid);
struct user_struct *up;
spin_lock_irq(&uidhash_lock);
@@ -200,11 +315,11 @@ static int __init uid_cache_init(void)
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
for(n = 0; n < UIDHASH_SZ; ++n)
- INIT_LIST_HEAD(uidhash_table + n);
+ INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
/* Insert the root user immediately (init already runs as root) */
spin_lock_irq(&uidhash_lock);
- uid_hash_insert(&root_user, uidhashentry(0));
+ uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
spin_unlock_irq(&uidhash_lock);
return 0;
Index: 2.6.18-rc4-mm3/include/linux/nsproxy.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/nsproxy.h
+++ 2.6.18-rc4-mm3/include/linux/nsproxy.h
@@ -7,6 +7,7 @@
struct namespace;
struct uts_namespace;
struct ipc_namespace;
+struct user_namespace;
/*
* A structure to contain pointers to all per-process
@@ -25,6 +26,7 @@ struct nsproxy {
spinlock_t nslock;
struct uts_namespace *uts_ns;
struct ipc_namespace *ipc_ns;
+ struct user_namespace *user_ns;
struct namespace *namespace;
};
extern struct nsproxy init_nsproxy;
Index: 2.6.18-rc4-mm3/include/linux/user.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/user.h
+++ 2.6.18-rc4-mm3/include/linux/user.h
@@ -1 +1,47 @@
+#ifndef _LINUX_USER_H
+#define _LINUX_USER_H
+
#include <asm/user.h>
+
+#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
+#define UIDHASH_SZ (1 << UIDHASH_BITS)
+
+struct user_namespace {
+ struct kref kref;
+ struct list_head uidhash_table[UIDHASH_SZ];
+ struct user_struct *root_user;
+};
+
+extern struct user_namespace init_user_ns;
+
+static inline void get_user_ns(struct user_namespace *ns)
+{
+ kref_get(&ns->kref);
+}
+
+#ifdef CONFIG_USER_NS
+extern int unshare_user_ns(unsigned long unshare_flags,
+ struct user_namespace **new_user);
+extern int copy_user_ns(int flags, struct task_struct *tsk);
+extern void free_user_ns(struct kref *kref);
+
+static inline void put_user_ns(struct user_namespace *ns)
+{
+ kref_put(&ns->kref, free_user_ns);
+}
+#else
+static inline int unshare_user_ns(unsigned long unshare_flags,
+ struct user_namespace **new_user)
+{
+ return -EINVAL;
+}
+static inline int copy_user_ns(int flags, struct task_struct *tsk)
+{
+ return 0;
+}
+static inline void put_user_ns(struct user_namespace *ns)
+{
+}
+#endif /* CONFIG_USER_NS */
+
+#endif /* _LINUX_USER_H */
Index: 2.6.18-rc4-mm3/kernel/nsproxy.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/nsproxy.c
+++ 2.6.18-rc4-mm3/kernel/nsproxy.c
@@ -19,6 +19,7 @@
#include <linux/init_task.h>
#include <linux/namespace.h>
#include <linux/utsname.h>
+#include <linux/user.h>
struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
@@ -68,6 +69,8 @@ struct nsproxy *dup_namespaces(struct ns
get_uts_ns(ns->uts_ns);
if (ns->ipc_ns)
get_ipc_ns(ns->ipc_ns);
+ if (ns->user_ns)
+ get_user_ns(ns->user_ns);
}
return ns;
@@ -88,7 +91,8 @@ int copy_namespaces(int flags, struct ta
get_nsproxy(old_ns);
- if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+ if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+ CLONE_NEWUSER)))
return 0;
new_ns = clone_namespaces(old_ns);
@@ -111,10 +115,17 @@ int copy_namespaces(int flags, struct ta
if (err)
goto out_ipc;
+ err = copy_user_ns(flags, tsk);
+ if (err)
+ goto out_user;
+
out:
put_nsproxy(old_ns);
return err;
+out_user:
+ if (new_ns->ipc_ns)
+ put_ipc_ns(new_ns->ipc_ns);
out_ipc:
if (new_ns->uts_ns)
put_uts_ns(new_ns->uts_ns);
@@ -135,5 +146,7 @@ void free_nsproxy(struct nsproxy *ns)
put_uts_ns(ns->uts_ns);
if (ns->ipc_ns)
put_ipc_ns(ns->ipc_ns);
+ if (ns->user_ns)
+ put_user_ns(ns->user_ns);
kfree(ns);
}
Index: 2.6.18-rc4-mm3/include/linux/sched.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/sched.h
+++ 2.6.18-rc4-mm3/include/linux/sched.h
@@ -26,6 +26,7 @@
#define CLONE_STOPPED 0x02000000 /* Start in stopped state */
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
+#define CLONE_NEWUSER 0x10000000 /* New user */
/*
* Scheduling policies
@@ -242,6 +243,7 @@ extern signed long schedule_timeout_unin
asmlinkage void schedule(void);
struct nsproxy;
+struct user_namespace;
/* Maximum number of active map areas.. This is a random (large) number */
#define DEFAULT_MAX_MAP_COUNT 65536
@@ -1249,7 +1251,7 @@ extern void set_special_pids(pid_t sessi
extern void __set_special_pids(pid_t session, pid_t pgrp);
/* per-UID process charging. */
-extern struct user_struct * alloc_uid(uid_t);
+extern struct user_struct * alloc_uid(struct user_namespace *, uid_t);
static inline struct user_struct *get_uid(struct user_struct *u)
{
atomic_inc(&u->__count);
Index: 2.6.18-rc4-mm3/init/Kconfig
===================================================================
--- 2.6.18-rc4-mm3.orig/init/Kconfig
+++ 2.6.18-rc4-mm3/init/Kconfig
@@ -250,6 +250,14 @@ config UTS_NS
vservers, to use uts namespaces to provide different
uts info for different servers. If unsure, say N.
+config USER_NS
+ bool "User Namespaces"
+ default n
+ help
+ Support user namespaces. This allows containers, i.e.
+ vservers, to use user namespaces to provide different
+ user info for different servers. If unsure, say N.
+
config AUDIT
bool "Auditing support"
depends on NET
Index: 2.6.18-rc4-mm3/include/linux/init_task.h
===================================================================
--- 2.6.18-rc4-mm3.orig/include/linux/init_task.h
+++ 2.6.18-rc4-mm3/include/linux/init_task.h
@@ -7,6 +7,7 @@
#include <linux/utsname.h>
#include <linux/lockdep.h>
#include <linux/ipc.h>
+#include <linux/user.h>
#define INIT_FDTABLE \
{ \
@@ -77,6 +78,7 @@ extern struct nsproxy init_nsproxy;
.uts_ns = &init_uts_ns, \
.namespace = NULL, \
INIT_IPC_NS(ipc_ns) \
+ .user_ns = &init_user_ns, \
}
#define INIT_SIGHAND(sighand) { \
Index: 2.6.18-rc4-mm3/kernel/sys.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/sys.c
+++ 2.6.18-rc4-mm3/kernel/sys.c
@@ -33,6 +33,7 @@
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/kprobes.h>
+#include <linux/user.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -1010,13 +1011,13 @@ static int set_user(uid_t new_ruid, int
{
struct user_struct *new_user;
- new_user = alloc_uid(new_ruid);
+ new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
if (!new_user)
return -EAGAIN;
if (atomic_read(&new_user->processes) >=
current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
- new_user != &root_user) {
+ new_user != current->nsproxy->user_ns->root_user) {
free_uid(new_user);
return -EAGAIN;
}
Index: 2.6.18-rc4-mm3/kernel/fork.c
===================================================================
--- 2.6.18-rc4-mm3.orig/kernel/fork.c
+++ 2.6.18-rc4-mm3/kernel/fork.c
@@ -991,7 +991,7 @@ static struct task_struct *copy_process(
if (atomic_read(&p->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
- p->user != &root_user)
+ p->user != current->nsproxy->user_ns->root_user)
goto bad_fork_free;
}
_______________________________________________
Containers mailing list
Containers at lists.osdl.org
https://lists.osdl.org/mailman/listinfo/containers
More information about the Devel
mailing list