[Devel] [PATCH v3] ve/fs: add per-VE limit of mount points

Stanislav Kinsburskiу skinsbursky at odin.com
Fri Nov 13 06:19:32 PST 2015


13 нояб. 2015 г. 14:58 пользователь Evgenii Shatokhin <eshatokhin at odin.com> написал:
>
> 13.11.2015 16:40, Stanislav Kinsburskiу пишет: 
> > Acked-by: Stanislav Kinsburskiy <skinsbursky at odin.com> 
> > 
> > BTW, have you checked, that this patch works as expected? 
> > Just to make sure we didn't miss anything. 
>
> I have built the kernel RPMs with it and re-checked that this "mount bomb" 
>    https://github.com/avagin/ctb/blob/master/mounts/run.sh 
> ends with "cannot allocate memory" and makes no harm. 
>
> I did not test !CONFIG_VE case though. 
BTW, noone needs our kernel without CONFIG_VE.
I suspect, that it's not even compilable without this option.

>
> > 
> > 13 нояб. 2015 г. 14:01 пользователь Evgenii Shatokhin <eshatokhin at odin.com> написал: 
> >> 
> >> https://jira.sw.ru/browse/PSBM-34438 
> >> 
> >> (This fix was adapted from PCS6.) 
> >> 
> >> It is possible for a container to create lots of mount points, which may 
> >> make operations with them slower. As some of these operations take 
> >> global locks (namespace_sem, vfsmount_lock), it might affect other 
> >> containers as well. 
> >> 
> >> Let us limit the maximum number of mount points a VE may create. The 
> >> limit can be customized via /proc/sys/fs/ve-mount-nr knob. 
> >> 
> >> Changes in v.3: 
> >> 
> >> * Revisited VE-specific parts of the patch to reduce the impact on the 
> >>    generic code. 
> >> 
> >> Changes in v.2: 
> >> 
> >> * The situations where VE0 mounts something and another VE unmounts it 
> >>    seem to be of no concern. If so, it is OK not to alter struct  mount: 
> >>    the mount counter for a VE may become unbalanced but this is 
> >>    acceptable here. 
> >> 
> >> * The sysctl knob is now defined alongside other VE sysctls. 
> >> 
> >> Signed-off-by: Evgenii Shatokhin <eshatokhin at odin.com> 
> >> --- 
> >> fs/namespace.c      |  9 ++++++++- 
> >> include/linux/ve.h  | 27 +++++++++++++++++++++++++++ 
> >> kernel/ve/ve.c      |  2 ++ 
> >> kernel/ve/veowner.c | 15 +++++++++++++++ 
> >> 4 files changed, 52 insertions(+), 1 deletion(-) 
> >> 
> >> diff --git a/fs/namespace.c b/fs/namespace.c 
> >> index 8909c13..b4ea5a5 100644 
> >> --- a/fs/namespace.c 
> >> +++ b/fs/namespace.c 
> >> @@ -165,7 +165,12 @@ unsigned int mnt_get_count(struct mount *mnt) 
> >> 
> >> static struct mount *alloc_vfsmnt(const char *name) 
> >> { 
> >> - struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 
> >> + struct mount *mnt; 
> >> + 
> >> + if (!ve_mount_allowed()) 
> >> + return NULL; 
> >> + 
> >> + mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 
> >> if (mnt) { 
> >> int err; 
> >> 
> >> @@ -202,6 +207,7 @@ static struct mount *alloc_vfsmnt(const char *name) 
> >> INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); 
> >> #endif 
> >> } 
> >> + ve_mount_nr_inc(); 
> >> return mnt; 
> >> 
> >> #ifdef CONFIG_SMP 
> >> @@ -542,6 +548,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) 
> >> 
> >> static void free_vfsmnt(struct mount *mnt) 
> >> { 
> >> + ve_mount_nr_dec(); 
> >> kfree(mnt->mnt_devname); 
> >> mnt_free_id(mnt); 
> >> #ifdef CONFIG_SMP 
> >> diff --git a/include/linux/ve.h b/include/linux/ve.h 
> >> index 86b95c3..1249102 100644 
> >> --- a/include/linux/ve.h 
> >> +++ b/include/linux/ve.h 
> >> @@ -128,6 +128,10 @@ struct ve_struct { 
> >> unsigned long aio_nr; 
> >> unsigned long aio_max_nr; 
> >> #endif 
> >> + /* Number of mounts. May become unbalanced if VE0 mounts something 
> >> + * and the VE unmounts it. This is acceptable. 
> >> + */ 
> >> + int mnt_nr; 
> >> }; 
> >> 
> >> struct ve_devmnt { 
> >> @@ -145,6 +149,8 @@ extern int nr_ve; 
> >> extern struct proc_dir_entry *proc_vz_dir; 
> >> extern struct cgroup_subsys ve_subsys; 
> >> 
> >> +extern unsigned int sysctl_ve_mount_nr; 
> >> + 
> >> #ifdef CONFIG_VE_IPTABLES 
> >> extern __u64 ve_setup_iptables_mask(__u64 init_mask); 
> >> #endif 
> >> @@ -222,6 +228,23 @@ extern struct tty_driver *vtty_console_driver(int *index); 
> >> extern int vtty_open_master(envid_t veid, int idx); 
> >> #endif /* CONFIG_TTY */ 
> >> 
> >> +static inline int ve_mount_allowed(void) 
> >> +{ 
> >> + struct ve_struct *ve = get_exec_env(); 
> >> + 
> >> + return ve_is_super(ve) || ve->mnt_nr < sysctl_ve_mount_nr; 
> >> +} 
> >> + 
> >> +static inline void ve_mount_nr_inc(void) 
> >> +{ 
> >> + get_exec_env()->mnt_nr++; 
> >> +} 
> >> + 
> >> +static inline void ve_mount_nr_dec(void) 
> >> +{ 
> >> + get_exec_env()->mnt_nr--; 
> >> +} 
> >> + 
> >> #else /* CONFIG_VE */ 
> >> 
> >> #define ve_uevent_seqnum uevent_seqnum 
> >> @@ -253,6 +276,10 @@ static inline void monotonic_abs_to_ve(clockid_t which_clock, 
> >> struct timespec *tp) { } 
> >> static inline void monotonic_ve_to_abs(clockid_t which_clock, 
> >> struct timepsec *tp) { } 
> >> + 
> >> +static inline int ve_mount_allowed(void) { return 1; } 
> >> +static inline void ve_mount_nr_inc(void) { } 
> >> +static inline void ve_mount_nr_dec(void) { } 
> >> #endif /* CONFIG_VE */ 
> >> 
> >> #endif /* _LINUX_VE_H */ 
> >> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c 
> >> index e9219e6..ac2babb 100644 
> >> --- a/kernel/ve/ve.c 
> >> +++ b/kernel/ve/ve.c 
> >> @@ -82,6 +82,7 @@ struct ve_struct ve0 = { 
> >> #endif 
> >> .sched_lat_ve.cur = &ve0_lat_stats, 
> >> .init_cred = &init_cred, 
> >> + .mnt_nr = 0, 
> >> }; 
> >> EXPORT_SYMBOL(ve0); 
> >> 
> >> @@ -653,6 +654,7 @@ do_init: 
> >> ve->aio_nr = 0; 
> >> ve->aio_max_nr = AIO_MAX_NR_DEFAULT; 
> >> #endif 
> >> + ve->mnt_nr = 0; 
> >> 
> >> return &ve->css; 
> >> 
> >> diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c 
> >> index 316e4d0..1a7e735 100644 
> >> --- a/kernel/ve/veowner.c 
> >> +++ b/kernel/ve/veowner.c 
> >> @@ -55,6 +55,14 @@ static void prepare_proc(void) 
> >> int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT; 
> >> static int ve_area_access_check; 
> >> 
> >> +/* 
> >> + * Operations with a big amount of mount points can require a lot of time. 
> >> + * These operations take the global lock namespace_sem, so they can affect 
> >> + * other containers. Let us allow no more than sysctl_ve_mount_nr mount 
> >> + * points for a VE. 
> >> + */ 
> >> +unsigned int sysctl_ve_mount_nr = 4096; 
> >> + 
> >> static struct ctl_table vz_fs_table[] = { 
> >> { 
> >> .procname = "ve-area-access-check", 
> >> @@ -77,6 +85,13 @@ static struct ctl_table vz_fs_table[] = { 
> >> .mode = 0644 | S_ISVTX, 
> >> .proc_handler = &proc_dointvec_virtual, 
> >> }, 
> >> + { 
> >> + .procname       = "ve-mount-nr", 
> >> + .data           = &sysctl_ve_mount_nr, 
> >> + .maxlen         = sizeof(sysctl_ve_mount_nr), 
> >> + .mode           = 0644, 
> >> + .proc_handler   = proc_dointvec, 
> >> + }, 
> >> { 0 } 
> >> }; 
> >> 
> >> -- 
> >> 2.3.2 
> >> 
>



More information about the Devel mailing list