[Devel] [PATCH RFC] User namespaces: general cleanups
Serge E. Hallyn
serue at us.ibm.com
Thu Oct 9 18:19:17 PDT 2008
David, here is a new version taking into account your feedback from
yesterday.
Eric, it has changed quite a bit since you last looked at it, so if you
have a chance to take another look that'd be great. It's still just the
general cleanup, no handling of cross-userns capabilities or file
accesses :)
thanks,
-serge
>From da249dbe92c1b218f8194a7fd386e5fba91049c1 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serue at us.ibm.com>
Date: Thu, 9 Oct 2008 13:21:07 -0500
Subject: [PATCH 1/1] User namespaces: general cleanups
Move the user namespace from the nsproxy to the struct user_struct, because
1. otherwise a struct cred or struct user_struct is insufficient to
unambiguously determine uid equivalence
2. this nicely fits the hierarchical user namespace+struct user_struct
model.
When a user_namespace is created, the user which created it is
marked as its 'creator'. The user_namespace pins the creator.
Each userid in a user_ns pins the user_ns. This keeps refcounting
nice and simple. Refcounting rules are as follows:
The task pins the user struct.
The user struct pins its user namespace.
The user namespace pins the struct user which created it.
User namespaces are cloned during copy_creds(). Unsharing a new user_ns
is no longer possible. (We could re-add that, but it'll cause code
duplication and doesn't seem useful if PAM doesn't need to clone user
namespaces).
Open question: do we need to sched_switch_user(task) for the new
user namespace? My inclination is to say that ideally we want
scheduling to be based on the userid in the top level user_ns
anyway, so ignore the question for now.
Todo: remove debug printks
Changelog
Oct 9: Move create_new_userns from create_new_namespaces
to copy_creds().
Signed-off-by: Serge Hallyn <serue at us.ibm.com>
---
include/linux/init_task.h | 1 -
include/linux/nsproxy.h | 1 -
include/linux/sched.h | 1 +
include/linux/user_namespace.h | 13 ++------
kernel/cred.c | 28 ++++++++++++++++++-
kernel/fork.c | 5 +--
kernel/nsproxy.c | 15 +--------
kernel/sys.c | 4 +-
kernel/user.c | 52 ++++++++++++----------------------
kernel/user_namespace.c | 60 +++++++++++----------------------------
10 files changed, 74 insertions(+), 106 deletions(-)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index edf0285..126c3c4 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -57,7 +57,6 @@ extern struct nsproxy init_nsproxy;
.mnt_ns = NULL, \
INIT_NET_NS(net_ns) \
INIT_IPC_NS(ipc_ns) \
- .user_ns = &init_user_ns, \
}
#define INIT_SIGHAND(sighand) { \
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index c8a768e..afad7de 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -27,7 +27,6 @@ struct nsproxy {
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
- struct user_namespace *user_ns;
struct net *net_ns;
};
extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 86ba19c..86acc5f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -595,6 +595,7 @@ struct user_struct {
/* Hash table maintenance information */
struct hlist_node uidhash_node;
uid_t uid;
+ struct user_namespace *user_ns;
#ifdef CONFIG_USER_SCHED
struct task_group *tg;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b5f41d4..d6e61a2 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -12,7 +12,7 @@
struct user_namespace {
struct kref kref;
struct hlist_head uidhash_table[UIDHASH_SZ];
- struct user_struct *root_user;
+ struct user_struct *creator;
};
extern struct user_namespace init_user_ns;
@@ -26,8 +26,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
return ns;
}
-extern struct user_namespace *copy_user_ns(int flags,
- struct user_namespace *old_ns);
+extern struct user_struct *create_new_userns(struct task_struct *tsk);
extern void free_user_ns(struct kref *kref);
static inline void put_user_ns(struct user_namespace *ns)
@@ -43,13 +42,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
return &init_user_ns;
}
-static inline struct user_namespace *copy_user_ns(int flags,
- struct user_namespace *old_ns)
+static inline struct user_struct *create_new_userns(struct task_struct *tsk)
{
- if (flags & CLONE_NEWUSER)
- return ERR_PTR(-EINVAL);
-
- return old_ns;
+ return ERR_PTR(-EINVAL);
}
static inline void put_user_ns(struct user_namespace *ns)
diff --git a/kernel/cred.c b/kernel/cred.c
index 13697ca..abaf318 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -274,6 +274,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
struct thread_group_cred *tgcred;
#endif
struct cred *new;
+ struct user_struct *new_root = NULL;
mutex_init(&p->cred_exec_mutex);
@@ -289,9 +290,34 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
return 0;
}
+ if (clone_flags & CLONE_NEWUSER) {
+ /*
+ * hopefully the capability check goes away when userns support
+ * is complete
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (clone_flags & CLONE_THREAD)
+ return -EINVAL;
+ new_root = create_new_userns(p);
+ if (IS_ERR(new_root))
+ return PTR_ERR(new_root);
+ }
+
new = prepare_creds();
- if (!new)
+ if (!new) {
+ free_uid(new_root);
return -ENOMEM;
+ }
+
+ /* If we created a new user_ns, make its root user
+ * our user */
+ if (new_root) {
+ new->uid = new->euid = new->suid = new->fsuid = 0;
+ new->gid = new->egid = new->sgid = new->fsgid = 0;
+ free_uid(new->user);
+ new->user = new_root;
+ }
#ifdef CONFIG_KEYS
/* new threads get their own thread keyrings if their parent already
diff --git a/kernel/fork.c b/kernel/fork.c
index e65f424..b533278 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -933,7 +933,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (atomic_read(&p->real_cred->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
- p->real_cred->user != current->nsproxy->user_ns->root_user)
+ p->real_cred->user != INIT_USER)
goto bad_fork_free;
}
@@ -1553,8 +1553,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
err = -EINVAL;
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
- CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
- CLONE_NEWNET))
+ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
goto bad_unshare_out;
/*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 1d3ef29..63598dc 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -80,12 +80,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_pid;
}
- new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
- if (IS_ERR(new_nsp->user_ns)) {
- err = PTR_ERR(new_nsp->user_ns);
- goto out_user;
- }
-
new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
if (IS_ERR(new_nsp->net_ns)) {
err = PTR_ERR(new_nsp->net_ns);
@@ -95,9 +89,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
return new_nsp;
out_net:
- if (new_nsp->user_ns)
- put_user_ns(new_nsp->user_ns);
-out_user:
if (new_nsp->pid_ns)
put_pid_ns(new_nsp->pid_ns);
out_pid:
@@ -130,7 +121,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
get_nsproxy(old_ns);
if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
+ CLONE_NEWPID | CLONE_NEWNET)))
return 0;
if (!capable(CAP_SYS_ADMIN)) {
@@ -173,8 +164,6 @@ void free_nsproxy(struct nsproxy *ns)
put_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
put_pid_ns(ns->pid_ns);
- if (ns->user_ns)
- put_user_ns(ns->user_ns);
put_net(ns->net_ns);
kmem_cache_free(nsproxy_cachep, ns);
}
@@ -189,7 +178,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
int err = 0;
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWUSER | CLONE_NEWNET)))
+ CLONE_NEWNET)))
return 0;
if (!capable(CAP_SYS_ADMIN))
diff --git a/kernel/sys.c b/kernel/sys.c
index 68ce096..d2bed52 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -565,13 +565,13 @@ static int set_user(struct cred *new)
{
struct user_struct *new_user;
- new_user = alloc_uid(current->nsproxy->user_ns, new->uid);
+ new_user = alloc_uid(current_user()->user_ns, new->uid);
if (!new_user)
return -EAGAIN;
if (atomic_read(&new_user->processes) >=
current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
- new_user != current->nsproxy->user_ns->root_user) {
+ new_user != INIT_USER) {
free_uid(new_user);
return -EAGAIN;
}
diff --git a/kernel/user.c b/kernel/user.c
index ed4dc57..1390387 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -20,9 +20,9 @@
struct user_namespace init_user_ns = {
.kref = {
- .refcount = ATOMIC_INIT(2),
+ .refcount = ATOMIC_INIT(1),
},
- .root_user = &root_user,
+ .creator = &root_user,
};
EXPORT_SYMBOL_GPL(init_user_ns);
@@ -48,12 +48,14 @@ static struct kmem_cache *uid_cachep;
*/
static DEFINE_SPINLOCK(uidhash_lock);
+/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = ATOMIC_INIT(2),
.processes = ATOMIC_INIT(1),
.files = ATOMIC_INIT(0),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
+ .user_ns = &init_user_ns,
#ifdef CONFIG_USER_SCHED
.tg = &init_task_group,
#endif
@@ -314,12 +316,15 @@ done:
* IRQ state (as stored in flags) is restored and uidhash_lock released
* upon function exit.
*/
-static inline void free_user(struct user_struct *up, unsigned long flags)
+static void free_user(struct user_struct *up, unsigned long flags)
{
+ printk(KERN_NOTICE "%s: freeing uid (%d) in ns (%p)\n",
+ __func__, up->uid, up->user_ns);
/* restore back the count */
atomic_inc(&up->__count);
spin_unlock_irqrestore(&uidhash_lock, flags);
+ put_user_ns(up->user_ns);
INIT_WORK(&up->work, remove_user_sysfs_dir);
schedule_work(&up->work);
}
@@ -335,13 +340,16 @@ static inline void uids_mutex_unlock(void) { }
* IRQ state (as stored in flags) is restored and uidhash_lock released
* upon function exit.
*/
-static inline void free_user(struct user_struct *up, unsigned long flags)
+static void free_user(struct user_struct *up, unsigned long flags)
{
+ printk(KERN_NOTICE "%s: freeing uid (%d) in ns (%p)\n",
+ __func__, up->uid, up->user_ns);
uid_hash_remove(up);
spin_unlock_irqrestore(&uidhash_lock, flags);
sched_destroy_user(up);
key_put(up->uid_keyring);
key_put(up->session_keyring);
+ put_user_ns(up->user_ns);
kmem_cache_free(uid_cachep, up);
}
@@ -357,7 +365,7 @@ struct user_struct *find_user(uid_t uid)
{
struct user_struct *ret;
unsigned long flags;
- struct user_namespace *ns = current->nsproxy->user_ns;
+ struct user_namespace *ns = current_user()->user_ns;
spin_lock_irqsave(&uidhash_lock, flags);
ret = uid_hash_find(uid, uidhashentry(ns, uid));
@@ -404,6 +412,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
if (sched_create_user(new) < 0)
goto out_free_user;
+ new->user_ns = get_user_ns(ns);
+
if (uids_user_create(new))
goto out_destoy_sched;
@@ -432,10 +442,13 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
uids_mutex_unlock();
+ printk(KERN_NOTICE "%s: allocated a uid (%d) in user_ns (%p)\n",
+ __func__, uid, ns);
return up;
out_destoy_sched:
sched_destroy_user(new);
+ put_user_ns(new->user_ns);
out_free_user:
kmem_cache_free(uid_cachep, new);
out_unlock:
@@ -443,33 +456,6 @@ out_unlock:
return NULL;
}
-#ifdef CONFIG_USER_NS
-void release_uids(struct user_namespace *ns)
-{
- int i;
- unsigned long flags;
- struct hlist_head *head;
- struct hlist_node *nd;
-
- spin_lock_irqsave(&uidhash_lock, flags);
- /*
- * collapse the chains so that the user_struct-s will
- * be still alive, but not in hashes. subsequent free_uid()
- * will free them.
- */
- for (i = 0; i < UIDHASH_SZ; i++) {
- head = ns->uidhash_table + i;
- while (!hlist_empty(head)) {
- nd = head->first;
- hlist_del_init(nd);
- }
- }
- spin_unlock_irqrestore(&uidhash_lock, flags);
-
- free_uid(ns->root_user);
-}
-#endif
-
static int __init uid_cache_init(void)
{
int n;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0d9c51d..86200b1 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,17 +9,16 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
+#include <linux/cred.h>
/*
- * Clone a new ns copying an original user ns, setting refcount to 1
- * @old_ns: namespace to clone
- * Return NULL on error (failure to kmalloc), new ns otherwise
+ * Return a new user_struct which is root in a new user_ns. This is called by
+ * copy_creds(), which will finish setting the target task's credentials.
*/
-static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
+struct user_struct *create_new_userns(struct task_struct *tsk)
{
struct user_namespace *ns;
- struct user_struct *new_user;
- struct cred *new;
+ struct user_struct *root_user;
int n;
ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
@@ -31,48 +30,22 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
for (n = 0; n < UIDHASH_SZ; ++n)
INIT_HLIST_HEAD(ns->uidhash_table + n);
- /* Insert new root user. */
- ns->root_user = alloc_uid(ns, 0);
- if (!ns->root_user) {
+ /* Alloc new root user. */
+ root_user = alloc_uid(ns, 0);
+ if (!root_user) {
kfree(ns);
return ERR_PTR(-ENOMEM);
}
- /* Reset current->user with a new one */
- new_user = alloc_uid(ns, current_uid());
- if (!new_user) {
- free_uid(ns->root_user);
- kfree(ns);
- return ERR_PTR(-ENOMEM);
- }
-
- /* Install the new user */
- new = prepare_creds();
- if (!new) {
- free_uid(new_user);
- free_uid(ns->root_user);
- kfree(ns);
- }
- free_uid(new->user);
- new->user = new_user;
- commit_creds(new);
- return ns;
-}
-
-struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
-{
- struct user_namespace *new_ns;
-
- BUG_ON(!old_ns);
- get_user_ns(old_ns);
-
- if (!(flags & CLONE_NEWUSER))
- return old_ns;
+ /* save away and pin the creating user */
+ ns->creator = tsk->cred->user; /* tsk is still being created */
+ get_uid(ns->creator);
- new_ns = clone_user_ns(old_ns);
+ /* alloc_uid() incremented the userns refcount. Just set it to 1 */
+ kref_set(&ns->kref, 1);
- put_user_ns(old_ns);
- return new_ns;
+ printk(KERN_NOTICE "allocated a user_ns (%p)\n", ns);
+ return root_user;
}
void free_user_ns(struct kref *kref)
@@ -80,7 +53,8 @@ void free_user_ns(struct kref *kref)
struct user_namespace *ns;
ns = container_of(kref, struct user_namespace, kref);
- release_uids(ns);
+ free_uid(ns->creator);
+ printk(KERN_NOTICE "freeing a user_ns (%p)\n", ns);
kfree(ns);
}
EXPORT_SYMBOL(free_user_ns);
--
1.5.4.3
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list