[Devel] [PATCH RHEL7 COMMIT] pfcache: do not account peer files to memcg

Konstantin Khorenko khorenko at virtuozzo.com
Mon May 2 08:16:40 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.10.1.vz7.12.16
------>
commit 6812907ce7c3017906486cd3eae390d7c644014a
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Mon May 2 19:16:40 2016 +0400

    pfcache: do not account peer files to memcg
    
    Charging a peer file to a particular cgroup can result in pinning the
    cgroup for indefinitely long after destruction, because peer files are
    shared system-wide.
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 fs/ext4/pfcache.c          | 13 ++++++++++++-
 include/linux/memcontrol.h | 11 +++++++++++
 mm/memcontrol.c            |  6 ++++--
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/pfcache.c b/fs/ext4/pfcache.c
index b9751ce..fe1296f 100644
--- a/fs/ext4/pfcache.c
+++ b/fs/ext4/pfcache.c
@@ -15,6 +15,7 @@
 #include <linux/namei.h>
 #include <linux/exportfs.h>
 #include <linux/init_task.h>	/* for init_cred */
+#include <linux/memcontrol.h>
 #include "ext4.h"
 #include "xattr.h"
 #include "../internal.h"
@@ -59,6 +60,14 @@ int ext4_open_pfcache(struct inode *inode)
 
 	pfcache_path(inode, name);
 
+	/*
+	 * Lookups over shared area shouldn't be accounted to any particular
+	 * memory cgroup, otherwise a cgroup can be pinned for indefinitely
+	 * long after destruction, because a file or directory located in this
+	 * area is likely to be in use by another containers or host.
+	 */
+	memcg_stop_kmem_account();
+
 	cur_cred = override_creds(&init_cred);
 	/*
 	 * Files in cache area must not have csum attributes or
@@ -72,12 +81,14 @@ int ext4_open_pfcache(struct inode *inode)
 	revert_creds(cur_cred);
 	path_put(&root);
 	if (ret)
-		return ret;
+		goto out;
 
 	ret = open_mapping_peer(inode->i_mapping, &path, &init_cred);
 	if (!ret)
 		percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_pfcache_peers);
 	path_put(&path);
+out:
+	memcg_resume_kmem_account();
 	return ret;
 }
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 743fb0b..0dbb653 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -511,6 +511,9 @@ extern int memcg_nr_cache_ids;
 extern void memcg_get_cache_ids(void);
 extern void memcg_put_cache_ids(void);
 
+extern void memcg_stop_kmem_account(void);
+extern void memcg_resume_kmem_account(void);
+
 /*
  * Helper macro to loop through all memcg-specific caches. Callers must still
  * check if the cache is valid (it is either valid or NULL).
@@ -702,6 +705,14 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
+static inline void memcg_stop_kmem_account(void)
+{
+}
+
+static inline void memcg_resume_kmem_account(void)
+{
+}
+
 static inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2a6c1f7..b0feff5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3361,17 +3361,19 @@ static void memcg_free_cache_id(int id)
  * memcg_kmem_skip_account. So we enclose anything that might allocate memory
  * inside the following two functions.
  */
-static inline void memcg_stop_kmem_account(void)
+void memcg_stop_kmem_account(void)
 {
 	VM_BUG_ON(!current->mm);
 	current->memcg_kmem_skip_account++;
 }
+EXPORT_SYMBOL(memcg_stop_kmem_account);
 
-static inline void memcg_resume_kmem_account(void)
+void memcg_resume_kmem_account(void)
 {
 	VM_BUG_ON(!current->mm);
 	current->memcg_kmem_skip_account--;
 }
+EXPORT_SYMBOL(memcg_resume_kmem_account);
 
 struct memcg_kmem_cache_create_work {
 	struct mem_cgroup *memcg;


More information about the Devel mailing list