[Devel] [PATCH RHEL7 COMMIT] memcg: access beyond end of shrinker_map
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Jan 17 12:45:12 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1062.7.1.vz7.130.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.7.1.vz7.130.10
------>
commit 2ad49c8b916379544fa9dd35f5059dd9ffb0ce1b
Author: Vasily Averin <vvs at virtuozzo.com>
Date: Fri Jan 17 12:45:11 2020 +0300
memcg: access beyond end of shrinker_map
vz7 kernel can remove in-use memcg from parent-chield hierarchy tree,
and skip resize of its shrinker_map during registration of new shrinkers.
It allows memcg user to access memory beyond of end non-resized
shrinker_map, and corrupt next slab element.
CPU A CPU B CPU C
got memcg cgroup_rmdir() sget_userns()
shrink_slab_memcg() cgroup_destroy_locked() register_shrinker()
. cgroup_css_killed() register_memcg_shrinker()
. cgroup_offline_fn() down_write(&shrinker_rwsem)
. list_del_rcu(&cgrp->sibling) memcg_expand_shrinker_maps()
wait for shrinker_rwsem re-allocs shrinker_maps of all memcg
. but ignores memcg used on CPU A
. because it was removed by CPU B
. releases shrinker_rwsem
accesses and modifies memory
beyond end of own shrinker_map
while using of new memcg_shrinkers
registered on CPU C
v4:
- corrected WARN_ON condition
v3:
- corrected map->nr_max value, "- 1" was removed
- nr_max update inside shrink_slab_memcg()
v2 changes:
- map_size was in bytes, shrinker_nr_max was in bytes (thanks to ktkhai@)
- map_size was replaced by nr_max -- maximal allowed bit number
- added WARN_ONs to detect another possible corruption places
https://jira.sw.ru/browse/PSBM-100509
https://jira.sw.ru/browse/PSBM-100694
Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
Acked-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
include/linux/memcontrol.h | 1 +
mm/memcontrol.c | 4 ++++
mm/vmscan.c | 7 +++++--
3 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c1e50faf00e59..485a3d354b78b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -51,6 +51,7 @@ struct mem_cgroup_reclaim_cookie {
*/
struct memcg_shrinker_map {
struct rcu_head rcu;
+ int nr_max;
unsigned long map[0];
};
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 077361eb399bf..0514f9b2b2308 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -754,6 +754,7 @@ static int memcg_expand_one_shrinker_map(struct mem_cgroup *memcg,
/* Set all old bits, clear all new bits */
memset(new->map, (int)0xff, old_size);
memset((void *)new->map + old_size, 0, size - old_size);
+ new->nr_max = size * BITS_PER_BYTE;
rcu_assign_pointer(memcg->info.nodeinfo[nid]->shrinker_map, new);
call_rcu(&old->rcu, memcg_free_shrinker_map_rcu);
@@ -797,6 +798,7 @@ static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
ret = -ENOMEM;
break;
}
+ map->nr_max = size * BITS_PER_BYTE;
rcu_assign_pointer(memcg->info.nodeinfo[nid]->shrinker_map, map);
}
mutex_unlock(&memcg_shrinker_map_mutex);
@@ -841,6 +843,7 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
map = rcu_dereference(memcg->info.nodeinfo[nid]->shrinker_map);
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
+ WARN_ON(shrinker_id >= map->nr_max);
set_bit(shrinker_id, map->map);
rcu_read_unlock();
}
@@ -857,6 +860,7 @@ void memcg_clear_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id
*/
rcu_read_lock();
map = rcu_dereference(memcg->info.nodeinfo[nid]->shrinker_map);
+ WARN_ON(shrinker_id >= map->nr_max);
clear_bit(shrinker_id, map->map);
rcu_read_unlock();
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index de7e51584f6f9..ec3443ad161bd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -488,7 +488,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct memcg_shrinker_map *map;
unsigned long ret, freed = 0;
- int i;
+ int i, nr_max;
if (!memcg_kmem_enabled())
return 0;
@@ -501,7 +501,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
if (unlikely(!map))
goto unlock;
- for_each_set_bit(i, map->map, shrinker_nr_max) {
+ nr_max = min(shrinker_nr_max, map->nr_max);
+
+ for_each_set_bit(i, map->map, nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
@@ -569,6 +571,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
}
put_shrinker(shrinker);
map = memcg_nid_shrinker_map(memcg, nid);
+ nr_max = min(shrinker_nr_max, map->nr_max);
if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
More information about the Devel
mailing list