[Devel] [PATCH v3] ve/fs: add per-VE limit of mount points

Evgenii Shatokhin eshatokhin at odin.com
Fri Nov 13 05:01:39 PST 2015


https://jira.sw.ru/browse/PSBM-34438

(This fix was adapted from PCS6.)

It is possible for a container to create lots of mount points, which may
make operations with them slower. As some of these operations take
global locks (namespace_sem, vfsmount_lock), it might affect other
containers as well.

Let us limit the maximum number of mount points a VE may create. The
limit can be customized via /proc/sys/fs/ve-mount-nr knob.

Changes in v.3:

* Revisited VE-specific parts of the patch to reduce the impact on the
  generic code.

Changes in v.2:

* The situations where VE0 mounts something and another VE unmounts it
  seem to be of no concern. If so, it is OK not to alter struct  mount:
  the mount counter for a VE may become unbalanced but this is
  acceptable here.

* The sysctl knob is now defined alongside other VE sysctls.

Signed-off-by: Evgenii Shatokhin <eshatokhin at odin.com>
---
 fs/namespace.c      |  9 ++++++++-
 include/linux/ve.h  | 27 +++++++++++++++++++++++++++
 kernel/ve/ve.c      |  2 ++
 kernel/ve/veowner.c | 15 +++++++++++++++
 4 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 8909c13..b4ea5a5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -165,7 +165,12 @@ unsigned int mnt_get_count(struct mount *mnt)
 
 static struct mount *alloc_vfsmnt(const char *name)
 {
-	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
+	struct mount *mnt;
+
+	if (!ve_mount_allowed())
+		return NULL;
+
+	mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
 	if (mnt) {
 		int err;
 
@@ -202,6 +207,7 @@ static struct mount *alloc_vfsmnt(const char *name)
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
 	}
+	ve_mount_nr_inc();
 	return mnt;
 
 #ifdef CONFIG_SMP
@@ -542,6 +548,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 
 static void free_vfsmnt(struct mount *mnt)
 {
+	ve_mount_nr_dec();
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
 #ifdef CONFIG_SMP
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 86b95c3..1249102 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -128,6 +128,10 @@ struct ve_struct {
 	unsigned long		aio_nr;
 	unsigned long		aio_max_nr;
 #endif
+	/* Number of mounts. May become unbalanced if VE0 mounts something
+	 * and the VE unmounts it. This is acceptable.
+	 */
+	int			mnt_nr;
 };
 
 struct ve_devmnt {
@@ -145,6 +149,8 @@ extern int nr_ve;
 extern struct proc_dir_entry *proc_vz_dir;
 extern struct cgroup_subsys ve_subsys;
 
+extern unsigned int sysctl_ve_mount_nr;
+
 #ifdef CONFIG_VE_IPTABLES
 extern __u64 ve_setup_iptables_mask(__u64 init_mask);
 #endif
@@ -222,6 +228,23 @@ extern struct tty_driver *vtty_console_driver(int *index);
 extern int vtty_open_master(envid_t veid, int idx);
 #endif /* CONFIG_TTY */
 
+static inline int ve_mount_allowed(void)
+{
+	struct ve_struct *ve = get_exec_env();
+
+	return ve_is_super(ve) || ve->mnt_nr < sysctl_ve_mount_nr;
+}
+
+static inline void ve_mount_nr_inc(void)
+{
+	get_exec_env()->mnt_nr++;
+}
+
+static inline void ve_mount_nr_dec(void)
+{
+	get_exec_env()->mnt_nr--;
+}
+
 #else	/* CONFIG_VE */
 
 #define ve_uevent_seqnum uevent_seqnum
@@ -253,6 +276,10 @@ static inline void monotonic_abs_to_ve(clockid_t which_clock,
 				struct timespec *tp) { }
 static inline void monotonic_ve_to_abs(clockid_t which_clock,
 				struct timepsec *tp) { }
+
+static inline int ve_mount_allowed(void) { return 1; }
+static inline void ve_mount_nr_inc(void) { }
+static inline void ve_mount_nr_dec(void) { }
 #endif	/* CONFIG_VE */
 
 #endif /* _LINUX_VE_H */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index e9219e6..ac2babb 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -82,6 +82,7 @@ struct ve_struct ve0 = {
 #endif
 	.sched_lat_ve.cur	= &ve0_lat_stats,
 	.init_cred		= &init_cred,
+	.mnt_nr			= 0,
 };
 EXPORT_SYMBOL(ve0);
 
@@ -653,6 +654,7 @@ do_init:
 	ve->aio_nr = 0;
 	ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
 #endif
+	ve->mnt_nr = 0;
 
 	return &ve->css;
 
diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
index 316e4d0..1a7e735 100644
--- a/kernel/ve/veowner.c
+++ b/kernel/ve/veowner.c
@@ -55,6 +55,14 @@ static void prepare_proc(void)
 int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT;
 static int ve_area_access_check;
 
+/*
+ * Operations with a big amount of mount points can require a lot of time.
+ * These operations take the global lock namespace_sem, so they can affect
+ * other containers. Let us allow no more than sysctl_ve_mount_nr mount
+ * points for a VE.
+ */
+unsigned int sysctl_ve_mount_nr = 4096;
+
 static struct ctl_table vz_fs_table[] = {
 	{
 		.procname	= "ve-area-access-check",
@@ -77,6 +85,13 @@ static struct ctl_table vz_fs_table[] = {
 		.mode		= 0644 | S_ISVTX,
 		.proc_handler	= &proc_dointvec_virtual,
 	},
+	{
+		.procname       = "ve-mount-nr",
+		.data           = &sysctl_ve_mount_nr,
+		.maxlen         = sizeof(sysctl_ve_mount_nr),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
 	{ 0 }
 };
 
-- 
2.3.2



More information about the Devel mailing list