[Devel] [PATCH RHEL8 COMMIT] mm/vmscan.c: iterate only over charged shrinkers during memcg shrink_slab()

Konstantin Khorenko khorenko at virtuozzo.com
Thu Apr 2 17:13:02 MSK 2020


The commit is pushed to "branch-rh8-4.18.0-80.1.2.vz8.3.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-80.1.2.vz8.3.4
------>
commit a123cd769b8afa9c7b84c7e9477dc3a5e036ef88
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Thu Apr 2 17:13:01 2020 +0300

    mm/vmscan.c: iterate only over charged shrinkers during memcg shrink_slab()
    
    Using the preparations made in previous patches, in case of memcg
    shrink, we may avoid shrinkers, which are not set in memcg's shrinkers
    bitmap.  To do that, we separate iterations over memcg-aware and
    !memcg-aware shrinkers, and memcg-aware shrinkers are chosen via
    for_each_set_bit() from the bitmap.  In case of big nodes, having many
    isolated environments, this gives significant performance growth.  See
    next patches for the details.
    
    Note that the patch does not respect to empty memcg shrinkers, since we
    never clear the bitmap bits after we set it once.  Their shrinkers will
    be called again, with no shrinked objects as result.  This functionality
    is provided by next patches.
    
    [ktkhai at virtuozzo.com: v9]
      Link: http://lkml.kernel.org/r/153112558507.4097.12713813335683345488.stgit@localhost.localdomain
    Link: http://lkml.kernel.org/r/153063066653.1818.976035462801487910.stgit@localhost.localdomain
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    Acked-by: Vladimir Davydov <vdavydov.dev at gmail.com>
    Tested-by: Shakeel Butt <shakeelb at google.com>
    Cc: Al Viro <viro at zeniv.linux.org.uk>
    Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
    Cc: Chris Wilson <chris at chris-wilson.co.uk>
    Cc: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
    Cc: Guenter Roeck <linux at roeck-us.net>
    Cc: "Huang, Ying" <ying.huang at intel.com>
    Cc: Johannes Weiner <hannes at cmpxchg.org>
    Cc: Josef Bacik <jbacik at fb.com>
    Cc: Li RongQing <lirongqing at baidu.com>
    Cc: Matthew Wilcox <willy at infradead.org>
    Cc: Matthias Kaehlcke <mka at chromium.org>
    Cc: Mel Gorman <mgorman at techsingularity.net>
    Cc: Michal Hocko <mhocko at kernel.org>
    Cc: Minchan Kim <minchan at kernel.org>
    Cc: Philippe Ombredanne <pombredanne at nexb.com>
    Cc: Roman Gushchin <guro at fb.com>
    Cc: Sahitya Tummala <stummala at codeaurora.org>
    Cc: Stephen Rothwell <sfr at canb.auug.org.au>
    Cc: Tetsuo Handa <penguin-kernel at I-love.SAKURA.ne.jp>
    Cc: Thomas Gleixner <tglx at linutronix.de>
    Cc: Waiman Long <longman at redhat.com>
    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
    (cherry picked from commit b0dedc49a2daa0f44ddc51fbf686b2ef012fccbf)
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 mm/vmscan.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 9 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5c4fd3bd2c3f..80fa2e8b873a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -332,6 +332,21 @@ int prealloc_shrinker(struct shrinker *shrinker)
 	if (!shrinker->nr_deferred)
 		return -ENOMEM;
 
+	/*
+	 * There is a window between prealloc_shrinker()
+	 * and register_shrinker_prepared(). We don't want
+	 * to clear bit of a shrinker in such the state
+	 * in shrink_slab_memcg(), since this will impose
+	 * restrictions on a code registering a shrinker
+	 * (they would have to guarantee, their LRU lists
+	 * are empty till shrinker is completely registered).
+	 * So, we differ the situation, when 1)a shrinker
+	 * is semi-registered (id is assigned, but it has
+	 * not yet linked to shrinker_list) and 2)shrinker
+	 * is not registered (id is not assigned).
+	 */
+	INIT_LIST_HEAD(&shrinker->list);
+
 	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
 		if (prealloc_memcg_shrinker(shrinker))
 			goto free_deferred;
@@ -511,6 +526,63 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
 	return freed;
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
+			struct mem_cgroup *memcg, int priority)
+{
+	struct memcg_shrinker_map *map;
+	unsigned long freed = 0;
+	int ret, i;
+
+	if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
+		return 0;
+
+	if (!down_read_trylock(&shrinker_rwsem))
+		return 0;
+
+	map = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_map,
+					true);
+	if (unlikely(!map))
+		goto unlock;
+
+	for_each_set_bit(i, map->map, shrinker_nr_max) {
+		struct shrink_control sc = {
+			.gfp_mask = gfp_mask,
+			.nid = nid,
+			.memcg = memcg,
+		};
+		struct shrinker *shrinker;
+
+		shrinker = idr_find(&shrinker_idr, i);
+		if (unlikely(!shrinker)) {
+			clear_bit(i, map->map);
+			continue;
+		}
+
+		/* See comment in prealloc_shrinker() */
+		if (unlikely(list_empty(&shrinker->list)))
+			continue;
+
+		ret = do_shrink_slab(&sc, shrinker, priority);
+		freed += ret;
+
+		if (rwsem_is_contended(&shrinker_rwsem)) {
+			freed = freed ? : 1;
+			break;
+		}
+	}
+unlock:
+	up_read(&shrinker_rwsem);
+	return freed;
+}
+#else /* CONFIG_MEMCG_KMEM */
+static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
+			struct mem_cgroup *memcg, int priority)
+{
+	return 0;
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
 /**
  * shrink_slab - shrink slab caches
  * @gfp_mask: allocation context
@@ -540,8 +612,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 	struct shrinker *shrinker;
 	unsigned long freed = 0;
 
-	if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)))
-		return 0;
+	if (memcg && !mem_cgroup_is_root(memcg))
+		return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
 
 	if (unlikely(test_tsk_thread_flag(current, TIF_MEMDIE)))
 		return 0;
@@ -557,13 +629,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 			.for_drop_caches = for_drop_caches,
 		};
 
-		/*
-		 * If kernel memory accounting is disabled, we ignore
-		 * SHRINKER_MEMCG_AWARE flag and call all shrinkers
-		 * passing NULL for memcg.
-		 */
-		if (memcg_kmem_enabled() &&
-		    !!memcg != !!(shrinker->flags & SHRINKER_MEMCG_AWARE))
+		if (!!memcg != !!(shrinker->flags & SHRINKER_MEMCG_AWARE))
 			continue;
 
 		if (!(shrinker->flags & SHRINKER_NUMA_AWARE))


More information about the Devel mailing list