[Devel] [PATCH v2 rh7 2/2] fs/aio, ve: aio_nr & aio_max_nr variables virtualization

Andrey Ryabinin aryabinin at odin.com
Tue Aug 18 09:17:40 PDT 2015


Virtualization of kernel global aio_nr & aio_max_nr variables is required
to isolate containers and ve0 when allocating aio request/events resources.

Each ve and ve0 has own aio_nr, aio_max_nr values. Function ioctx_alloc trying
to charge appropriate aio_nr value selected by ve context.

It's not possible to exhaust aio events resources of one ve from another ve.

Default per-CT aio_max_nr value == 0x10000, including CT0.

https://jira.sw.ru/browse/PSBM-29017

Signed-off-by: Andrey Ryabinin <aryabinin at odin.com>
---

Changes since V1:
	- Don't initilize ve0 fields as they will be initilized in ve_create().

 fs/aio.c            | 38 +++++++++++++++++++++-----------------
 include/linux/aio.h |  6 ++----
 include/linux/ve.h  |  5 +++++
 kernel/sysctl.c     | 16 ++++++++--------
 kernel/ve/ve.c      |  7 +++++++
 5 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 70a6599..9d700b0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/aio.h>
+#include <linux/ve.h>
 #include <linux/highmem.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
@@ -122,14 +123,9 @@ struct kioctx {
 
 	struct page		*internal_pages[AIO_RING_PAGES];
 	struct file		*aio_ring_file;
+	struct ve_struct	*ve;
 };
 
-/*------ sysctl variables----*/
-static DEFINE_SPINLOCK(aio_nr_lock);
-unsigned long aio_nr;		/* current system wide number of aio requests */
-unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
-/*----end sysctl variables---*/
-
 static struct kmem_cache	*kiocb_cachep;
 static struct kmem_cache	*kioctx_cachep;
 
@@ -495,6 +491,9 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
 static void free_ioctx_rcu(struct rcu_head *head)
 {
 	struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+	struct ve_struct *ve = ctx->ve;
+
+	put_ve(ve);
 	kmem_cache_free(kioctx_cachep, ctx);
 }
 
@@ -571,6 +570,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 {
 	struct mm_struct *mm = current->mm;
 	struct kioctx *ctx;
+	struct ve_struct *ve = get_exec_env();
 	int err = -ENOMEM;
 
 	/* Prevent overflows */
@@ -580,7 +580,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (!nr_events || (unsigned long)nr_events > aio_max_nr)
+	if (!nr_events || (unsigned long)nr_events > ve->aio_max_nr)
 		return ERR_PTR(-EAGAIN);
 
 	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -588,6 +588,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 		return ERR_PTR(-ENOMEM);
 
 	ctx->max_reqs = nr_events;
+	ctx->ve = get_ve(ve);
 
 	spin_lock_init(&ctx->ctx_lock);
 	spin_lock_init(&ctx->completion_lock);
@@ -608,14 +609,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 		goto out_freectx;
 
 	/* limit the number of system wide aios */
-	spin_lock(&aio_nr_lock);
-	if (aio_nr + nr_events > aio_max_nr ||
-	    aio_nr + nr_events < aio_nr) {
-		spin_unlock(&aio_nr_lock);
+	spin_lock(&ve->aio_nr_lock);
+	if (ve->aio_nr + nr_events > ve->aio_max_nr ||
+	    ve->aio_nr + nr_events < ve->aio_nr) {
+		spin_unlock(&ve->aio_nr_lock);
 		goto out_cleanup;
 	}
-	aio_nr += ctx->max_reqs;
-	spin_unlock(&aio_nr_lock);
+	ve->aio_nr += ctx->max_reqs;
+	spin_unlock(&ve->aio_nr_lock);
 
 	/* now link into global list. */
 	spin_lock(&mm->ioctx_lock);
@@ -633,6 +634,7 @@ out_cleanup:
 	err = -EAGAIN;
 	aio_free_ring(ctx);
 out_freectx:
+	put_ve(ctx->ve);
 	mutex_unlock(&ctx->ring_lock);
 	put_aio_ring_file(ctx);
 	kmem_cache_free(kioctx_cachep, ctx);
@@ -665,6 +667,8 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 		struct completion *requests_done)
 {
 	if (!atomic_xchg(&ctx->dead, 1)) {
+		struct ve_struct *ve = ctx->ve;
+
 		spin_lock(&mm->ioctx_lock);
 		hlist_del_rcu(&ctx->list);
 		spin_unlock(&mm->ioctx_lock);
@@ -676,10 +680,10 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 		 * -EAGAIN with no ioctxs actually in use (as far as userspace
 		 *  could tell).
 		 */
-		spin_lock(&aio_nr_lock);
-		BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
-		aio_nr -= ctx->max_reqs;
-		spin_unlock(&aio_nr_lock);
+		spin_lock(&ve->aio_nr_lock);
+		BUG_ON(ve->aio_nr - ctx->max_reqs > ve->aio_nr);
+		ve->aio_nr -= ctx->max_reqs;
+		spin_unlock(&ve->aio_nr_lock);
 
 		if (ctx->mmap_size)
 			vm_munmap(ctx->mmap_base, ctx->mmap_size);
diff --git a/include/linux/aio.h b/include/linux/aio.h
index a2f6172..0aa7dd3 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -14,6 +14,8 @@ struct kiocb;
 
 #define KIOCB_KEY		0
 
+#define AIO_MAX_NR_DEFAULT	0x10000
+
 /*
  * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
  * cancelled or completed (this makes a certain amount of sense because
@@ -124,8 +126,4 @@ static inline struct kiocb *list_kiocb(struct list_head *h)
 	return list_entry(h, struct kiocb, ki_list);
 }
 
-/* for sysctl: */
-extern unsigned long aio_nr;
-extern unsigned long aio_max_nr;
-
 #endif /* __LINUX__AIO_H */
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 7ba3f92..41be5af 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -137,6 +137,11 @@ struct ve_struct {
 #if IS_ENABLED(CONFIG_DEVTMPFS)
 	struct path		devtmpfs_root;
 #endif
+#ifdef CONFIG_AIO
+	spinlock_t              aio_nr_lock;
+	unsigned long           aio_nr;
+	unsigned long           aio_max_nr;
+#endif
 };
 
 struct ve_devmnt {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a568e7..976f48c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1656,17 +1656,17 @@ static struct ctl_table fs_table[] = {
 #ifdef CONFIG_AIO
 	{
 		.procname	= "aio-nr",
-		.data		= &aio_nr,
-		.maxlen		= sizeof(aio_nr),
-		.mode		= 0444,
-		.proc_handler	= proc_doulongvec_minmax,
+		.data		= &ve0.aio_nr,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0444 | S_ISVTX,
+		.proc_handler	= proc_doulongvec_minmax_virtual,
 	},
 	{
 		.procname	= "aio-max-nr",
-		.data		= &aio_max_nr,
-		.maxlen		= sizeof(aio_max_nr),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
+		.data		= &ve0.aio_max_nr,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644 | S_ISVTX,
+		.proc_handler	= proc_doulongvec_minmax_virtual,
 	},
 #endif /* CONFIG_AIO */
 #ifdef CONFIG_INOTIFY_USER
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 6496727..cdfcbec 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -17,6 +17,7 @@
 #include <linux/ve.h>
 #include <linux/init.h>
 
+#include <linux/aio.h>
 #include <linux/errno.h>
 #include <linux/unistd.h>
 #include <linux/slab.h>
@@ -692,6 +693,12 @@ do_init:
 	mutex_init(&ve->devmnt_mutex);
 	kmapset_init_key(&ve->ve_sysfs_perms);
 
+#ifdef CONFIG_AIO
+	spin_lock_init(&ve->aio_nr_lock);
+	ve->aio_nr = 0;
+	ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
+#endif
+
 	return &ve->css;
 
 err_log:
-- 
2.4.6




More information about the Devel mailing list