[Devel] [PATCH v3] ve/fs: add per-VE limit of mount points
Evgenii Shatokhin
eshatokhin at odin.com
Fri Nov 13 05:58:30 PST 2015
13.11.2015 16:40, Stanislav Kinsburskiу пишет:
> Acked-by: Stanislav Kinsburskiy <skinsbursky at odin.com>
>
> BTW, have you checked, that this patch works as expected?
> Just to make sure we didn't miss anything.
I have built the kernel RPMs with it and re-checked that this "mount bomb"
https://github.com/avagin/ctb/blob/master/mounts/run.sh
ends with "cannot allocate memory" and makes no harm.
I did not test !CONFIG_VE case though.
>
> 13 нояб. 2015 г. 14:01 пользователь Evgenii Shatokhin <eshatokhin at odin.com> написал:
>>
>> https://jira.sw.ru/browse/PSBM-34438
>>
>> (This fix was adapted from PCS6.)
>>
>> It is possible for a container to create lots of mount points, which may
>> make operations with them slower. As some of these operations take
>> global locks (namespace_sem, vfsmount_lock), it might affect other
>> containers as well.
>>
>> Let us limit the maximum number of mount points a VE may create. The
>> limit can be customized via /proc/sys/fs/ve-mount-nr knob.
>>
>> Changes in v.3:
>>
>> * Revisited VE-specific parts of the patch to reduce the impact on the
>> generic code.
>>
>> Changes in v.2:
>>
>> * The situations where VE0 mounts something and another VE unmounts it
>> seem to be of no concern. If so, it is OK not to alter struct mount:
>> the mount counter for a VE may become unbalanced but this is
>> acceptable here.
>>
>> * The sysctl knob is now defined alongside other VE sysctls.
>>
>> Signed-off-by: Evgenii Shatokhin <eshatokhin at odin.com>
>> ---
>> fs/namespace.c | 9 ++++++++-
>> include/linux/ve.h | 27 +++++++++++++++++++++++++++
>> kernel/ve/ve.c | 2 ++
>> kernel/ve/veowner.c | 15 +++++++++++++++
>> 4 files changed, 52 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/namespace.c b/fs/namespace.c
>> index 8909c13..b4ea5a5 100644
>> --- a/fs/namespace.c
>> +++ b/fs/namespace.c
>> @@ -165,7 +165,12 @@ unsigned int mnt_get_count(struct mount *mnt)
>>
>> static struct mount *alloc_vfsmnt(const char *name)
>> {
>> - struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
>> + struct mount *mnt;
>> +
>> + if (!ve_mount_allowed())
>> + return NULL;
>> +
>> + mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
>> if (mnt) {
>> int err;
>>
>> @@ -202,6 +207,7 @@ static struct mount *alloc_vfsmnt(const char *name)
>> INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
>> #endif
>> }
>> + ve_mount_nr_inc();
>> return mnt;
>>
>> #ifdef CONFIG_SMP
>> @@ -542,6 +548,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
>>
>> static void free_vfsmnt(struct mount *mnt)
>> {
>> + ve_mount_nr_dec();
>> kfree(mnt->mnt_devname);
>> mnt_free_id(mnt);
>> #ifdef CONFIG_SMP
>> diff --git a/include/linux/ve.h b/include/linux/ve.h
>> index 86b95c3..1249102 100644
>> --- a/include/linux/ve.h
>> +++ b/include/linux/ve.h
>> @@ -128,6 +128,10 @@ struct ve_struct {
>> unsigned long aio_nr;
>> unsigned long aio_max_nr;
>> #endif
>> + /* Number of mounts. May become unbalanced if VE0 mounts something
>> + * and the VE unmounts it. This is acceptable.
>> + */
>> + int mnt_nr;
>> };
>>
>> struct ve_devmnt {
>> @@ -145,6 +149,8 @@ extern int nr_ve;
>> extern struct proc_dir_entry *proc_vz_dir;
>> extern struct cgroup_subsys ve_subsys;
>>
>> +extern unsigned int sysctl_ve_mount_nr;
>> +
>> #ifdef CONFIG_VE_IPTABLES
>> extern __u64 ve_setup_iptables_mask(__u64 init_mask);
>> #endif
>> @@ -222,6 +228,23 @@ extern struct tty_driver *vtty_console_driver(int *index);
>> extern int vtty_open_master(envid_t veid, int idx);
>> #endif /* CONFIG_TTY */
>>
>> +static inline int ve_mount_allowed(void)
>> +{
>> + struct ve_struct *ve = get_exec_env();
>> +
>> + return ve_is_super(ve) || ve->mnt_nr < sysctl_ve_mount_nr;
>> +}
>> +
>> +static inline void ve_mount_nr_inc(void)
>> +{
>> + get_exec_env()->mnt_nr++;
>> +}
>> +
>> +static inline void ve_mount_nr_dec(void)
>> +{
>> + get_exec_env()->mnt_nr--;
>> +}
>> +
>> #else /* CONFIG_VE */
>>
>> #define ve_uevent_seqnum uevent_seqnum
>> @@ -253,6 +276,10 @@ static inline void monotonic_abs_to_ve(clockid_t which_clock,
>> struct timespec *tp) { }
>> static inline void monotonic_ve_to_abs(clockid_t which_clock,
>> struct timepsec *tp) { }
>> +
>> +static inline int ve_mount_allowed(void) { return 1; }
>> +static inline void ve_mount_nr_inc(void) { }
>> +static inline void ve_mount_nr_dec(void) { }
>> #endif /* CONFIG_VE */
>>
>> #endif /* _LINUX_VE_H */
>> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
>> index e9219e6..ac2babb 100644
>> --- a/kernel/ve/ve.c
>> +++ b/kernel/ve/ve.c
>> @@ -82,6 +82,7 @@ struct ve_struct ve0 = {
>> #endif
>> .sched_lat_ve.cur = &ve0_lat_stats,
>> .init_cred = &init_cred,
>> + .mnt_nr = 0,
>> };
>> EXPORT_SYMBOL(ve0);
>>
>> @@ -653,6 +654,7 @@ do_init:
>> ve->aio_nr = 0;
>> ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
>> #endif
>> + ve->mnt_nr = 0;
>>
>> return &ve->css;
>>
>> diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
>> index 316e4d0..1a7e735 100644
>> --- a/kernel/ve/veowner.c
>> +++ b/kernel/ve/veowner.c
>> @@ -55,6 +55,14 @@ static void prepare_proc(void)
>> int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT;
>> static int ve_area_access_check;
>>
>> +/*
>> + * Operations with a big amount of mount points can require a lot of time.
>> + * These operations take the global lock namespace_sem, so they can affect
>> + * other containers. Let us allow no more than sysctl_ve_mount_nr mount
>> + * points for a VE.
>> + */
>> +unsigned int sysctl_ve_mount_nr = 4096;
>> +
>> static struct ctl_table vz_fs_table[] = {
>> {
>> .procname = "ve-area-access-check",
>> @@ -77,6 +85,13 @@ static struct ctl_table vz_fs_table[] = {
>> .mode = 0644 | S_ISVTX,
>> .proc_handler = &proc_dointvec_virtual,
>> },
>> + {
>> + .procname = "ve-mount-nr",
>> + .data = &sysctl_ve_mount_nr,
>> + .maxlen = sizeof(sysctl_ve_mount_nr),
>> + .mode = 0644,
>> + .proc_handler = proc_dointvec,
>> + },
>> { 0 }
>> };
>>
>> --
>> 2.3.2
>>
More information about the Devel
mailing list