[Devel] [PATCH rh7 6/8] mm/vmscan: reclaim cgroups that doesn't cause refaults first
Andrey Ryabinin
aryabinin at virtuozzo.com
Tue Feb 12 18:39:13 MSK 2019
Instead of iterating from all cgroups, reclaim cgroup that triggered
allocation first. Don't reclaim cgroup if it causes refaults.
https://pmc.acronis.com/browse/VSTOR-19037
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/memcontrol.h | 5 ++
include/linux/mmzone.h | 1 +
mm/vmscan.c | 122 ++++++++++++++++++++++---------------
3 files changed, 79 insertions(+), 49 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 575584dc1651..3dc16313a366 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -176,6 +176,11 @@ static inline void mem_cgroup_get(struct mem_cgroup *memcg)
css_get(mem_cgroup_css(memcg));
}
+static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
+{
+ return css_tryget(mem_cgroup_css(memcg));
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
css_put(mem_cgroup_css(memcg));
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 70e925d41445..59f53adfc1c5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -807,6 +807,7 @@ typedef struct pglist_data {
mem_hotplug_begin/end() */
int kswapd_max_order;
enum zone_type classzone_idx;
+ struct mem_cgroup *memcg;
#ifdef CONFIG_NUMA_BALANCING
/* Lock serializing the migrate rate limiting window */
spinlock_t numabalancing_migrate_lock;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fe651c6047db..583ba1abfc44 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2510,27 +2510,75 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
+static bool mem_cgroup_refaults(struct zone *zone, struct mem_cgroup *memcg)
+{
+ if (memcg) {
+ unsigned long refaults = memcg_ws_activates(memcg);
+ unsigned long snapshot = mem_cgroup_zone_lruvec(zone, memcg)->refaults;
+
+ return refaults != snapshot;
+ }
+ return false;
+}
+
+static unsigned long shrink_memcg(struct zone *zone, struct scan_control *sc,
+ struct mem_cgroup *memcg, bool is_classzone)
+{
+ struct mem_cgroup *root = sc->target_mem_cgroup;
+ struct reclaim_state *reclaim_state = current->reclaim_state;
+ unsigned long lru_pages, reclaimed;
+ bool slab_only = sc->slab_only;
+ struct lruvec *lruvec;
+
+ if (!sc->may_thrash && mem_cgroup_low(root, memcg))
+ return 0;
+
+ if (sc->priority && mem_cgroup_refaults(zone, memcg))
+ return 0;
+
+ reclaimed = sc->nr_reclaimed;
+
+ if (!slab_only) {
+ lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ sc->swappiness = mem_cgroup_swappiness(memcg);
+ shrink_lruvec(lruvec, sc, &lru_pages);
+ }
+
+ if (is_classzone) {
+ shrink_slab(sc->gfp_mask, zone_to_nid(zone),
+ memcg, sc->priority, false);
+ if (reclaim_state) {
+ sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+ sc->nr_scanned += reclaim_state->reclaimed_slab;
+ reclaim_state->reclaimed_slab = 0;
+ }
+ }
+
+ return sc->nr_reclaimed - reclaimed;
+}
+
static void shrink_zone(struct zone *zone, struct scan_control *sc,
bool is_classzone)
{
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_reclaimed, nr_scanned;
- gfp_t slab_gfp = sc->gfp_mask;
- bool slab_only = sc->slab_only;
+ struct mem_cgroup *target_memcg = NULL;
+
+ if (current_is_kswapd()) {
+ target_memcg = smp_load_acquire(&zone->zone_pgdat->memcg);
+ mem_cgroup_get(target_memcg);
+ }
- /* Disable fs-related IO for direct reclaim */
- if (!sc->target_mem_cgroup &&
- (current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
- slab_gfp &= ~__GFP_FS;
+ target_memcg = target_memcg ? : get_mem_cgroup_from_mm(current->mm);
do {
+ unsigned long shrinked;
struct mem_cgroup *root = sc->target_mem_cgroup;
struct mem_cgroup_reclaim_cookie reclaim = {
.zone = zone,
.priority = sc->priority,
};
- unsigned long zone_lru_pages = 0;
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg = target_memcg;
struct reclaim_stat stat = {};
sc->stat = &stat;
@@ -2538,50 +2586,19 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
- memcg = mem_cgroup_iter(root, NULL, &reclaim);
- do {
- unsigned long lru_pages, scanned;
- struct lruvec *lruvec;
-
- if (!sc->may_thrash && mem_cgroup_low(root, memcg))
- continue;
-
- scanned = sc->nr_scanned;
+ shrinked = shrink_memcg(zone, sc, memcg, is_classzone);
- if (!slab_only) {
- lruvec = mem_cgroup_zone_lruvec(zone, memcg);
- sc->swappiness = mem_cgroup_swappiness(memcg);
- shrink_lruvec(lruvec, sc, &lru_pages);
- zone_lru_pages += lru_pages;
- }
+ if (!shrinked) {
+ memcg = mem_cgroup_iter(root, NULL, &reclaim);
+ do {
+ shrink_memcg(zone, sc, memcg, is_classzone);
- if (is_classzone) {
- shrink_slab(slab_gfp, zone_to_nid(zone),
- memcg, sc->priority, false);
- if (reclaim_state) {
- sc->nr_reclaimed += reclaim_state->reclaimed_slab;
- sc->nr_scanned += reclaim_state->reclaimed_slab;
- reclaim_state->reclaimed_slab = 0;
+ if (sc->nr_reclaimed >= sc->nr_to_reclaim) {
+ mem_cgroup_iter_break(root, memcg);
+ break;
}
-
- }
-
- /*
- * Direct reclaim and kswapd have to scan all memory
- * cgroups to fulfill the overall scan target for the
- * zone.
- *
- * Limit reclaim, on the other hand, only cares about
- * nr_to_reclaim pages to be reclaimed and it will
- * retry with decreasing priority if one round over the
- * whole hierarchy is not sufficient.
- */
- if (!global_reclaim(sc) &&
- sc->nr_reclaimed >= sc->nr_to_reclaim) {
- mem_cgroup_iter_break(root, memcg);
- break;
- }
- } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+ } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+ }
if (global_reclaim(sc)) {
/*
@@ -2649,6 +2666,8 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ mem_cgroup_put(target_memcg);
}
/* Returns true if compaction should go ahead for a high-order request */
@@ -3811,6 +3830,7 @@ static int kswapd(void *p)
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
{
pg_data_t *pgdat;
+ struct mem_cgroup *prev_memcg;
if (!populated_zone(zone))
return;
@@ -3827,6 +3847,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
return;
+ prev_memcg = xchg(&pgdat->memcg, get_mem_cgroup_from_mm(current->mm));
+ if (prev_memcg)
+ mem_cgroup_put(prev_memcg);
+
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
wake_up_interruptible(&pgdat->kswapd_wait);
}
--
2.19.2
More information about the Devel
mailing list