[Devel] [PATCH v2 rh7 2/2] fs/aio, ve: aio_nr & aio_max_nr variables virtualization
Andrey Ryabinin
aryabinin at odin.com
Tue Aug 18 09:17:40 PDT 2015
Virtualization of kernel global aio_nr & aio_max_nr variables is required
to isolate containers and ve0 when allocating aio request/events resources.
Each ve and ve0 has own aio_nr, aio_max_nr values. Function ioctx_alloc trying
to charge appropriate aio_nr value selected by ve context.
It's not possible to exhaust aio events resources of one ve from another ve.
Default per-CT aio_max_nr value == 0x10000, including CT0.
https://jira.sw.ru/browse/PSBM-29017
Signed-off-by: Andrey Ryabinin <aryabinin at odin.com>
---
Changes since V1:
- Don't initilize ve0 fields as they will be initilized in ve_create().
fs/aio.c | 38 +++++++++++++++++++++-----------------
include/linux/aio.h | 6 ++----
include/linux/ve.h | 5 +++++
kernel/sysctl.c | 16 ++++++++--------
kernel/ve/ve.c | 7 +++++++
5 files changed, 43 insertions(+), 29 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 70a6599..9d700b0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/aio.h>
+#include <linux/ve.h>
#include <linux/highmem.h>
#include <linux/workqueue.h>
#include <linux/security.h>
@@ -122,14 +123,9 @@ struct kioctx {
struct page *internal_pages[AIO_RING_PAGES];
struct file *aio_ring_file;
+ struct ve_struct *ve;
};
-/*------ sysctl variables----*/
-static DEFINE_SPINLOCK(aio_nr_lock);
-unsigned long aio_nr; /* current system wide number of aio requests */
-unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
-/*----end sysctl variables---*/
-
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
@@ -495,6 +491,9 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
static void free_ioctx_rcu(struct rcu_head *head)
{
struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+ struct ve_struct *ve = ctx->ve;
+
+ put_ve(ve);
kmem_cache_free(kioctx_cachep, ctx);
}
@@ -571,6 +570,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
{
struct mm_struct *mm = current->mm;
struct kioctx *ctx;
+ struct ve_struct *ve = get_exec_env();
int err = -ENOMEM;
/* Prevent overflows */
@@ -580,7 +580,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-EINVAL);
}
- if (!nr_events || (unsigned long)nr_events > aio_max_nr)
+ if (!nr_events || (unsigned long)nr_events > ve->aio_max_nr)
return ERR_PTR(-EAGAIN);
ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -588,6 +588,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-ENOMEM);
ctx->max_reqs = nr_events;
+ ctx->ve = get_ve(ve);
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock);
@@ -608,14 +609,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
goto out_freectx;
/* limit the number of system wide aios */
- spin_lock(&aio_nr_lock);
- if (aio_nr + nr_events > aio_max_nr ||
- aio_nr + nr_events < aio_nr) {
- spin_unlock(&aio_nr_lock);
+ spin_lock(&ve->aio_nr_lock);
+ if (ve->aio_nr + nr_events > ve->aio_max_nr ||
+ ve->aio_nr + nr_events < ve->aio_nr) {
+ spin_unlock(&ve->aio_nr_lock);
goto out_cleanup;
}
- aio_nr += ctx->max_reqs;
- spin_unlock(&aio_nr_lock);
+ ve->aio_nr += ctx->max_reqs;
+ spin_unlock(&ve->aio_nr_lock);
/* now link into global list. */
spin_lock(&mm->ioctx_lock);
@@ -633,6 +634,7 @@ out_cleanup:
err = -EAGAIN;
aio_free_ring(ctx);
out_freectx:
+ put_ve(ctx->ve);
mutex_unlock(&ctx->ring_lock);
put_aio_ring_file(ctx);
kmem_cache_free(kioctx_cachep, ctx);
@@ -665,6 +667,8 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
struct completion *requests_done)
{
if (!atomic_xchg(&ctx->dead, 1)) {
+ struct ve_struct *ve = ctx->ve;
+
spin_lock(&mm->ioctx_lock);
hlist_del_rcu(&ctx->list);
spin_unlock(&mm->ioctx_lock);
@@ -676,10 +680,10 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
* -EAGAIN with no ioctxs actually in use (as far as userspace
* could tell).
*/
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
- aio_nr -= ctx->max_reqs;
- spin_unlock(&aio_nr_lock);
+ spin_lock(&ve->aio_nr_lock);
+ BUG_ON(ve->aio_nr - ctx->max_reqs > ve->aio_nr);
+ ve->aio_nr -= ctx->max_reqs;
+ spin_unlock(&ve->aio_nr_lock);
if (ctx->mmap_size)
vm_munmap(ctx->mmap_base, ctx->mmap_size);
diff --git a/include/linux/aio.h b/include/linux/aio.h
index a2f6172..0aa7dd3 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -14,6 +14,8 @@ struct kiocb;
#define KIOCB_KEY 0
+#define AIO_MAX_NR_DEFAULT 0x10000
+
/*
* We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
* cancelled or completed (this makes a certain amount of sense because
@@ -124,8 +126,4 @@ static inline struct kiocb *list_kiocb(struct list_head *h)
return list_entry(h, struct kiocb, ki_list);
}
-/* for sysctl: */
-extern unsigned long aio_nr;
-extern unsigned long aio_max_nr;
-
#endif /* __LINUX__AIO_H */
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 7ba3f92..41be5af 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -137,6 +137,11 @@ struct ve_struct {
#if IS_ENABLED(CONFIG_DEVTMPFS)
struct path devtmpfs_root;
#endif
+#ifdef CONFIG_AIO
+ spinlock_t aio_nr_lock;
+ unsigned long aio_nr;
+ unsigned long aio_max_nr;
+#endif
};
struct ve_devmnt {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a568e7..976f48c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1656,17 +1656,17 @@ static struct ctl_table fs_table[] = {
#ifdef CONFIG_AIO
{
.procname = "aio-nr",
- .data = &aio_nr,
- .maxlen = sizeof(aio_nr),
- .mode = 0444,
- .proc_handler = proc_doulongvec_minmax,
+ .data = &ve0.aio_nr,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0444 | S_ISVTX,
+ .proc_handler = proc_doulongvec_minmax_virtual,
},
{
.procname = "aio-max-nr",
- .data = &aio_max_nr,
- .maxlen = sizeof(aio_max_nr),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
+ .data = &ve0.aio_max_nr,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644 | S_ISVTX,
+ .proc_handler = proc_doulongvec_minmax_virtual,
},
#endif /* CONFIG_AIO */
#ifdef CONFIG_INOTIFY_USER
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 6496727..cdfcbec 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -17,6 +17,7 @@
#include <linux/ve.h>
#include <linux/init.h>
+#include <linux/aio.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/slab.h>
@@ -692,6 +693,12 @@ do_init:
mutex_init(&ve->devmnt_mutex);
kmapset_init_key(&ve->ve_sysfs_perms);
+#ifdef CONFIG_AIO
+ spin_lock_init(&ve->aio_nr_lock);
+ ve->aio_nr = 0;
+ ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
+#endif
+
return &ve->css;
err_log:
--
2.4.6
More information about the Devel
mailing list