[Devel] [PATCH vz9] Revert "userns: associate user_struct with the user_namespace"
nb
nikolay.borisov at virtuozzo.com
Tue Mar 7 10:25:13 MSK 2023
On 7.03.23 г. 5:23 ч., Pavel Tikhomirov wrote:
> After this revert user_struct->pipe_bufs (and obviousely other fields =)
> ) becomes shared between root user of container and root user on host,
> which is likely not what we want, as one ct can make all node starve for
> pipe buffers now, see PIPE_MIN_DEF_BUFFERS case in alloc_pipe_info().
Unfortunately you are right, in this case I wonder if it makes more
sense to also make pipe_buf part of the userns/ucount infrastructure.
Alternatively, I can look into fixing just the release_task() problem,
but I worry the fix might not be complete as the whole kernel needs to
be audited due to the per-userns uidhash being rather core to the whole
kernel.
>
> I don't feel that this revert is right.
>
> On 06.03.2023 21:16, Nikolay Borisov wrote:
>> In current vz9 kernel ucounts are already "virtualized" since they are
>> tracked
>> per-userns/per-uid. Let's remove the virtuozzo code which adds yet
>> another level
>> and breaks code which is using setuid. This fixes a kernel warning
>> which was
>> generated when running user08 test from ltp. The reason for the
>> warning was
>> that on clone with CLONE_NEWUSER and setuid the NPROC rlimit would be
>> erroneously
>> tracked across setuid() since the per-ns struct_user means
>> release_task() invocation
>> on process exit would erroneously account nproc counts.
>>
>> This reverts commit ff716deacf0c5c86ca53cee6793ff9382ad5aa02.
>>
>> https://jira.sw.ru/browse/PSBM-145641
>>
>> Signed-off-by: Nikolay Borisov <nikolay.borisov at virtuozzo.com>
>> ---
>> include/linux/sched/user.h | 1 -
>> include/linux/user_namespace.h | 4 ----
>> kernel/user.c | 22 +++++++++-------------
>> kernel/user_namespace.c | 13 -------------
>> 4 files changed, 9 insertions(+), 31 deletions(-)
>>
>> diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
>> index 5945f42179fc..65e447bcb7a4 100644
>> --- a/include/linux/sched/user.h
>> +++ b/include/linux/sched/user.h
>> @@ -46,7 +46,6 @@ extern struct user_struct *find_user(kuid_t);
>> extern struct user_struct root_user;
>> #define INIT_USER (&root_user)
>>
>> -extern struct user_struct * alloc_uid_ns(struct user_namespace *ns,
>> kuid_t);
>>
>> /* per-UID process charging. */
>> extern struct user_struct * alloc_uid(kuid_t);
>> diff --git a/include/linux/user_namespace.h
>> b/include/linux/user_namespace.h
>> index e5f4c5e9e587..33a4240e6a6f 100644
>> --- a/include/linux/user_namespace.h
>> +++ b/include/linux/user_namespace.h
>> @@ -14,9 +14,6 @@
>> #define UID_GID_MAP_MAX_BASE_EXTENTS 5
>> #define UID_GID_MAP_MAX_EXTENTS 340
>>
>> -#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 7)
>> -#define UIDHASH_SZ (1 << UIDHASH_BITS)
>> -
>> struct uid_gid_extent {
>> u32 first;
>> u32 lower_first;
>> @@ -70,7 +67,6 @@ struct user_namespace {
>> struct uid_gid_map uid_map;
>> struct uid_gid_map gid_map;
>> struct uid_gid_map projid_map;
>> - struct hlist_head uidhash_table[UIDHASH_SZ];
>> struct user_namespace *parent;
>> int level;
>> kuid_t owner;
>> diff --git a/kernel/user.c b/kernel/user.c
>> index ca0f7c78a045..c82399c1618a 100644
>> --- a/kernel/user.c
>> +++ b/kernel/user.c
>> @@ -9,7 +9,6 @@
>> * able to have per-user limits for system resources.
>> */
>>
>> -#include <linux/cred.h>
>> #include <linux/init.h>
>> #include <linux/sched.h>
>> #include <linux/slab.h>
>> @@ -76,11 +75,14 @@ EXPORT_SYMBOL_GPL(init_user_ns);
>> * when changing user ID's (ie setuid() and friends).
>> */
>>
>> +#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 7)
>> +#define UIDHASH_SZ (1 << UIDHASH_BITS)
>> #define UIDHASH_MASK (UIDHASH_SZ - 1)
>> #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) &
>> UIDHASH_MASK)
>> -#define uidhashentry(ns, uid) ((ns)->uidhash_table +
>> __uidhashfn((__kuid_val(uid))))
>> +#define uidhashentry(uid) (uidhash_table +
>> __uidhashfn((__kuid_val(uid))))
>>
>> static struct kmem_cache *uid_cachep;
>> +static struct hlist_head uidhash_table[UIDHASH_SZ];
>>
>> /*
>> * The uidhash_lock is mostly taken from process context, but it is
>> @@ -149,10 +151,9 @@ struct user_struct *find_user(kuid_t uid)
>> {
>> struct user_struct *ret;
>> unsigned long flags;
>> - struct user_namespace *ns = current_user_ns();
>>
>> spin_lock_irqsave(&uidhash_lock, flags);
>> - ret = uid_hash_find(uid, uidhashentry(ns, uid));
>> + ret = uid_hash_find(uid, uidhashentry(uid));
>> spin_unlock_irqrestore(&uidhash_lock, flags);
>> return ret;
>> }
>> @@ -168,9 +169,9 @@ void free_uid(struct user_struct *up)
>> free_user(up, flags);
>> }
>>
>> -struct user_struct *alloc_uid_ns(struct user_namespace *ns, kuid_t uid)
>> +struct user_struct *alloc_uid(kuid_t uid)
>> {
>> - struct hlist_head *hashent = uidhashentry(ns, uid);
>> + struct hlist_head *hashent = uidhashentry(uid);
>> struct user_struct *up, *new;
>>
>> spin_lock_irq(&uidhash_lock);
>> @@ -205,11 +206,6 @@ struct user_struct *alloc_uid_ns(struct
>> user_namespace *ns, kuid_t uid)
>> return up;
>> }
>>
>> -struct user_struct *alloc_uid(kuid_t uid)
>> -{
>> - return alloc_uid_ns(current_user_ns(), uid);
>> -}
>> -
>> static int __init uid_cache_init(void)
>> {
>> int n;
>> @@ -218,11 +214,11 @@ static int __init uid_cache_init(void)
>> 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
>>
>> for(n = 0; n < UIDHASH_SZ; ++n)
>> - INIT_HLIST_HEAD(init_user_ns.uidhash_table + n);
>> + INIT_HLIST_HEAD(uidhash_table + n);
>>
>> /* Insert the root user immediately (init already runs as root) */
>> spin_lock_irq(&uidhash_lock);
>> - uid_hash_insert(&root_user, uidhashentry(&init_user_ns,
>> GLOBAL_ROOT_UID));
>> + uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
>> spin_unlock_irq(&uidhash_lock);
>>
>> return 0;
>> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
>> index e175452253d3..5481ba44a8d6 100644
>> --- a/kernel/user_namespace.c
>> +++ b/kernel/user_namespace.c
>> @@ -81,7 +81,6 @@ static unsigned long enforced_nproc_rlimit(void)
>> int create_user_ns(struct cred *new)
>> {
>> struct user_namespace *ns, *parent_ns = new->user_ns;
>> - struct user_struct *new_user;
>> kuid_t owner = new->euid;
>> kgid_t group = new->egid;
>> struct ucounts *ucounts;
>> @@ -125,17 +124,6 @@ int create_user_ns(struct cred *new)
>> goto fail_free;
>> ns->ns.ops = &userns_operations;
>>
>> - for (i = 0; i < UIDHASH_SZ; ++i)
>> - INIT_HLIST_HEAD(ns->uidhash_table + i);
>> -
>> - ret = -ENOMEM;
>> - new_user = alloc_uid_ns(ns, owner);
>> - if (!new_user)
>> - goto fail_uid;
>> -
>> - free_uid(new->user);
>> - new->user = new_user;
>> -
>> refcount_set(&ns->ns.count, 1);
>> /* Leave the new->user_ns reference with the new user namespace. */
>> ns->parent = parent_ns;
>> @@ -171,7 +159,6 @@ int create_user_ns(struct cred *new)
>> #ifdef CONFIG_PERSISTENT_KEYRINGS
>> key_put(ns->persistent_keyring_register);
>> #endif
>> -fail_uid:
>> ns_free_inum(&ns->ns);
>> fail_free:
>> kmem_cache_free(user_ns_cachep, ns);
>> --
>> 2.31.1
>>
>> _______________________________________________
>> Devel mailing list
>> Devel at openvz.org
>> https://lists.openvz.org/mailman/listinfo/devel
>
More information about the Devel
mailing list