[Devel] [PATCH vz9] Revert "userns: associate user_struct with the user_namespace"

nb nikolay.borisov at virtuozzo.com
Tue Mar 7 10:25:13 MSK 2023



On 7.03.23 г. 5:23 ч., Pavel Tikhomirov wrote:
> After this revert user_struct->pipe_bufs (and obviousely other fields =) 
> ) becomes shared between root user of container and root user on host, 
> which is likely not what we want, as one ct can make all node starve for 
> pipe buffers now, see PIPE_MIN_DEF_BUFFERS case in alloc_pipe_info().

Unfortunately you are right, in this case I wonder if it makes more 
sense to also make pipe_buf part of the userns/ucount infrastructure. 
Alternatively, I can look into fixing just the release_task() problem, 
but I worry the fix might not be complete as the whole kernel needs to 
be audited due to the per-userns uidhash being rather core to the whole 
kernel.



> 
> I don't feel that this revert is right.
> 
> On 06.03.2023 21:16, Nikolay Borisov wrote:
>> In current vz9 kernel ucounts are already "virtualized" since they are 
>> tracked
>> per-userns/per-uid. Let's remove the virtuozzo code which adds yet 
>> another level
>> and breaks code which is using setuid. This fixes a kernel warning 
>> which was
>> generated when running user08 test from ltp. The reason for the 
>> warning was
>> that on clone with CLONE_NEWUSER and setuid the NPROC rlimit would be 
>> erroneously
>> tracked across setuid() since the per-ns struct_user means 
>> release_task() invocation
>> on process exit would erroneously account nproc counts.
>>
>> This reverts commit ff716deacf0c5c86ca53cee6793ff9382ad5aa02.
>>
>> https://jira.sw.ru/browse/PSBM-145641
>>
>> Signed-off-by: Nikolay Borisov <nikolay.borisov at virtuozzo.com>
>> ---
>>   include/linux/sched/user.h     |  1 -
>>   include/linux/user_namespace.h |  4 ----
>>   kernel/user.c                  | 22 +++++++++-------------
>>   kernel/user_namespace.c        | 13 -------------
>>   4 files changed, 9 insertions(+), 31 deletions(-)
>>
>> diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
>> index 5945f42179fc..65e447bcb7a4 100644
>> --- a/include/linux/sched/user.h
>> +++ b/include/linux/sched/user.h
>> @@ -46,7 +46,6 @@ extern struct user_struct *find_user(kuid_t);
>>   extern struct user_struct root_user;
>>   #define INIT_USER (&root_user)
>>
>> -extern struct user_struct * alloc_uid_ns(struct user_namespace *ns, 
>> kuid_t);
>>
>>   /* per-UID process charging. */
>>   extern struct user_struct * alloc_uid(kuid_t);
>> diff --git a/include/linux/user_namespace.h 
>> b/include/linux/user_namespace.h
>> index e5f4c5e9e587..33a4240e6a6f 100644
>> --- a/include/linux/user_namespace.h
>> +++ b/include/linux/user_namespace.h
>> @@ -14,9 +14,6 @@
>>   #define UID_GID_MAP_MAX_BASE_EXTENTS 5
>>   #define UID_GID_MAP_MAX_EXTENTS 340
>>
>> -#define UIDHASH_BITS   (CONFIG_BASE_SMALL ? 3 : 7)
>> -#define UIDHASH_SZ     (1 << UIDHASH_BITS)
>> -
>>   struct uid_gid_extent {
>>       u32 first;
>>       u32 lower_first;
>> @@ -70,7 +67,6 @@ struct user_namespace {
>>       struct uid_gid_map    uid_map;
>>       struct uid_gid_map    gid_map;
>>       struct uid_gid_map    projid_map;
>> -    struct hlist_head       uidhash_table[UIDHASH_SZ];
>>       struct user_namespace    *parent;
>>       int            level;
>>       kuid_t            owner;
>> diff --git a/kernel/user.c b/kernel/user.c
>> index ca0f7c78a045..c82399c1618a 100644
>> --- a/kernel/user.c
>> +++ b/kernel/user.c
>> @@ -9,7 +9,6 @@
>>    * able to have per-user limits for system resources.
>>    */
>>
>> -#include <linux/cred.h>
>>   #include <linux/init.h>
>>   #include <linux/sched.h>
>>   #include <linux/slab.h>
>> @@ -76,11 +75,14 @@ EXPORT_SYMBOL_GPL(init_user_ns);
>>    * when changing user ID's (ie setuid() and friends).
>>    */
>>
>> +#define UIDHASH_BITS    (CONFIG_BASE_SMALL ? 3 : 7)
>> +#define UIDHASH_SZ    (1 << UIDHASH_BITS)
>>   #define UIDHASH_MASK        (UIDHASH_SZ - 1)
>>   #define __uidhashfn(uid)    (((uid >> UIDHASH_BITS) + uid) & 
>> UIDHASH_MASK)
>> -#define uidhashentry(ns, uid)  ((ns)->uidhash_table + 
>> __uidhashfn((__kuid_val(uid))))
>> +#define uidhashentry(uid)    (uidhash_table + 
>> __uidhashfn((__kuid_val(uid))))
>>
>>   static struct kmem_cache *uid_cachep;
>> +static struct hlist_head uidhash_table[UIDHASH_SZ];
>>
>>   /*
>>    * The uidhash_lock is mostly taken from process context, but it is
>> @@ -149,10 +151,9 @@ struct user_struct *find_user(kuid_t uid)
>>   {
>>       struct user_struct *ret;
>>       unsigned long flags;
>> -    struct user_namespace *ns = current_user_ns();
>>
>>       spin_lock_irqsave(&uidhash_lock, flags);
>> -    ret = uid_hash_find(uid, uidhashentry(ns, uid));
>> +    ret = uid_hash_find(uid, uidhashentry(uid));
>>       spin_unlock_irqrestore(&uidhash_lock, flags);
>>       return ret;
>>   }
>> @@ -168,9 +169,9 @@ void free_uid(struct user_struct *up)
>>           free_user(up, flags);
>>   }
>>
>> -struct user_struct *alloc_uid_ns(struct user_namespace *ns, kuid_t uid)
>> +struct user_struct *alloc_uid(kuid_t uid)
>>   {
>> -    struct hlist_head *hashent = uidhashentry(ns, uid);
>> +    struct hlist_head *hashent = uidhashentry(uid);
>>       struct user_struct *up, *new;
>>
>>       spin_lock_irq(&uidhash_lock);
>> @@ -205,11 +206,6 @@ struct user_struct *alloc_uid_ns(struct 
>> user_namespace *ns, kuid_t uid)
>>       return up;
>>   }
>>
>> -struct user_struct *alloc_uid(kuid_t uid)
>> -{
>> -    return alloc_uid_ns(current_user_ns(), uid);
>> -}
>> -
>>   static int __init uid_cache_init(void)
>>   {
>>       int n;
>> @@ -218,11 +214,11 @@ static int __init uid_cache_init(void)
>>               0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
>>
>>       for(n = 0; n < UIDHASH_SZ; ++n)
>> -        INIT_HLIST_HEAD(init_user_ns.uidhash_table + n);
>> +        INIT_HLIST_HEAD(uidhash_table + n);
>>
>>       /* Insert the root user immediately (init already runs as root) */
>>       spin_lock_irq(&uidhash_lock);
>> -    uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 
>> GLOBAL_ROOT_UID));
>> +    uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
>>       spin_unlock_irq(&uidhash_lock);
>>
>>       return 0;
>> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
>> index e175452253d3..5481ba44a8d6 100644
>> --- a/kernel/user_namespace.c
>> +++ b/kernel/user_namespace.c
>> @@ -81,7 +81,6 @@ static unsigned long enforced_nproc_rlimit(void)
>>   int create_user_ns(struct cred *new)
>>   {
>>       struct user_namespace *ns, *parent_ns = new->user_ns;
>> -    struct user_struct *new_user;
>>       kuid_t owner = new->euid;
>>       kgid_t group = new->egid;
>>       struct ucounts *ucounts;
>> @@ -125,17 +124,6 @@ int create_user_ns(struct cred *new)
>>           goto fail_free;
>>       ns->ns.ops = &userns_operations;
>>
>> -    for (i = 0; i < UIDHASH_SZ; ++i)
>> -        INIT_HLIST_HEAD(ns->uidhash_table + i);
>> -
>> -    ret = -ENOMEM;
>> -    new_user = alloc_uid_ns(ns, owner);
>> -    if (!new_user)
>> -        goto fail_uid;
>> -
>> -    free_uid(new->user);
>> -    new->user = new_user;
>> -
>>       refcount_set(&ns->ns.count, 1);
>>       /* Leave the new->user_ns reference with the new user namespace. */
>>       ns->parent = parent_ns;
>> @@ -171,7 +159,6 @@ int create_user_ns(struct cred *new)
>>   #ifdef CONFIG_PERSISTENT_KEYRINGS
>>       key_put(ns->persistent_keyring_register);
>>   #endif
>> -fail_uid:
>>       ns_free_inum(&ns->ns);
>>   fail_free:
>>       kmem_cache_free(user_ns_cachep, ns);
>> -- 
>> 2.31.1
>>
>> _______________________________________________
>> Devel mailing list
>> Devel at openvz.org
>> https://lists.openvz.org/mailman/listinfo/devel
> 


More information about the Devel mailing list