[Devel] [PATCH] ve/fs: add per-VE limit of mount points

Evgenii Shatokhin eshatokhin at odin.com
Mon Nov 2 07:11:29 PST 2015


https://jira.sw.ru/browse/PSBM-34438

(This fix was adapted from PCS6.)

It is possible for a container to create lots of mount points, which may
make operations with them slower. As some of these operations take
global locks (namespace_sem, vfsmount_lock), it might affect other
containers as well.

Let us limit the maximum number of mount points a VE may create. The
limit can be customized via /proc/sys/fs/ve-mount-nr knob.

Signed-off-by: Evgenii Shatokhin <eshatokhin at odin.com>
---
 fs/mount.h                    |  2 ++
 fs/namespace.c                | 34 ++++++++++++++++++++++++++++++++--
 include/linux/mnt_namespace.h |  2 ++
 include/linux/ve.h            |  1 +
 include/linux/ve_proto.h      | 11 +++++++++++
 kernel/sysctl.c               |  8 ++++++++
 kernel/ve/ve.c                |  2 ++
 7 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 3e16b57..e192ff9 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -50,6 +50,8 @@ struct mount {
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
+
+	envid_t mnt_owner_veid;
 };
 
 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
diff --git a/fs/namespace.c b/fs/namespace.c
index 8909c13..be80e8d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -42,6 +42,14 @@ static struct list_head mountpoint_hashtable[HASH_SIZE];
 static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 
+/*
+ * Operations with a big amount of mount points can require a lot of time.
+ * These operations take the global lock namespace_sem, so they can affect
+ * other containers. Let us allow no more than sysctl_ve_mount_nr mount
+ * points for a VE.
+ */
+unsigned int sysctl_ve_mount_nr = 4096;
+
 /* /sys/fs */
 struct kobject *fs_kobj;
 EXPORT_SYMBOL_GPL(fs_kobj);
@@ -165,7 +173,15 @@ unsigned int mnt_get_count(struct mount *mnt)
 
 static struct mount *alloc_vfsmnt(const char *name)
 {
-	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
+	struct mount *mnt;
+	struct ve_struct *ve = get_exec_env();
+
+	if (ve &&
+	    atomic_add_return(1, &ve->mnt_nr) > sysctl_ve_mount_nr &&
+	    !ve_is_super(ve))
+		goto out_mnt_nr_dec;
+
+	mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
 	if (mnt) {
 		int err;
 
@@ -190,6 +206,8 @@ static struct mount *alloc_vfsmnt(const char *name)
 		mnt->mnt_writers = 0;
 #endif
 
+		mnt->mnt_owner_veid = get_veid(ve);
+
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -201,7 +219,9 @@ static struct mount *alloc_vfsmnt(const char *name)
 #ifdef CONFIG_FSNOTIFY
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
-	}
+	} else
+		goto out_mnt_nr_dec;
+
 	return mnt;
 
 #ifdef CONFIG_SMP
@@ -212,6 +232,9 @@ out_free_id:
 	mnt_free_id(mnt);
 out_free_cache:
 	kmem_cache_free(mnt_cache, mnt);
+out_mnt_nr_dec:
+	if (ve)
+		atomic_dec(&ve->mnt_nr);
 	return NULL;
 }
 
@@ -542,6 +565,13 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 
 static void free_vfsmnt(struct mount *mnt)
 {
+	struct ve_struct *ve = get_ve_by_id(mnt->mnt_owner_veid);
+
+	if (ve) {
+		atomic_dec(&ve->mnt_nr);
+		put_ve(ve);
+	}
+
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
 #ifdef CONFIG_SMP
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 12b2ab5..4d83d3c 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -14,5 +14,7 @@ extern const struct file_operations proc_mounts_operations;
 extern const struct file_operations proc_mountinfo_operations;
 extern const struct file_operations proc_mountstats_operations;
 
+extern unsigned int sysctl_ve_mount_nr;
+
 #endif
 #endif
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 86b95c3..c6530c2 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -128,6 +128,7 @@ struct ve_struct {
 	unsigned long		aio_nr;
 	unsigned long		aio_max_nr;
 #endif
+	atomic_t		mnt_nr;
 };
 
 struct ve_devmnt {
diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h
index 0f5898e..3894c60 100644
--- a/include/linux/ve_proto.h
+++ b/include/linux/ve_proto.h
@@ -32,6 +32,7 @@ static inline bool ve_is_super(struct ve_struct *ve)
 
 #define get_exec_env()		(current->task_ve)
 #define get_env_init(ve)	(ve->ve_ns->pid_ns->child_reaper)
+#define get_veid(ve)		(ve->veid)
 
 const char *ve_name(struct ve_struct *ve);
 
@@ -124,6 +125,11 @@ static inline struct ve_struct *get_exec_env(void)
 
 #define get_env_init(ve)	(ve->ve_ns->pid_ns->child_reaper)
 
+static inline envid_t get_veid(struct ve_struct *ve)
+{
+	return 0;
+}
+
 static inline bool ve_is_super(struct ve_struct *ve)
 {
 	return true;
@@ -139,6 +145,11 @@ static inline const char *task_ve_name(struct task_struct *task)
 	return "0";
 }
 
+static inline struct ve_struct *get_ve_by_id(envid_t veid)
+{
+	return NULL;
+}
+
 #define nr_threads_ve(ve)	(nr_threads)
 
 #endif /* CONFIG_VE */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9c081e3..ba69200 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/kexec.h>
 #include <linux/ve.h>
+#include <linux/mnt_namespace.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1740,6 +1741,13 @@ static struct ctl_table fs_table[] = {
 		.proc_handler	= &pipe_proc_fn,
 		.extra1		= &pipe_min_size,
 	},
+	{
+		.procname	= "ve-mount-nr",
+		.data		= &sysctl_ve_mount_nr,
+		.maxlen		= sizeof(sysctl_ve_mount_nr),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index e9219e6..216608c 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -82,6 +82,7 @@ struct ve_struct ve0 = {
 #endif
 	.sched_lat_ve.cur	= &ve0_lat_stats,
 	.init_cred		= &init_cred,
+	.mnt_nr			= ATOMIC_INIT(0),
 };
 EXPORT_SYMBOL(ve0);
 
@@ -653,6 +654,7 @@ do_init:
 	ve->aio_nr = 0;
 	ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
 #endif
+	atomic_set(&ve->mnt_nr, 0);
 
 	return &ve->css;
 
-- 
2.3.2




More information about the Devel mailing list