[Devel] [PATCH RHEL7 COMMIT] bc/memcg: show correct mem/swap failcnt for beancounters

Konstantin Khorenko khorenko at virtuozzo.com
Mon May 25 02:19:04 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.5
------>
commit 3fe10e41f46d0078bf6fb50a98d5c1becd5c2b82
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Mon May 25 13:19:04 2015 +0400

    bc/memcg: show correct mem/swap failcnt for beancounters
    
    Patchset description:
    
    ub: fix memcg resource accounting
    
    UB_PHYSPAGES, UB_SWAPPAGES, UB_KMEMSIZE are now accounted on the memcg
    size. However, some stats differ between beancounters and memcg (e.g.
    failcnt). This patch set fixes them. For more details, see individual
    patches.
    
    Related to https://jira.sw.ru/browse/PSBM-20089
    
    Vladimir Davydov (4):
      memcg: show correct swap max for beancounters
      memcg: show correct mem/swap failcnt for beancounters
      memcg: assure swap->held/maxheld < swap->limit for beancounters
      ub: deprecate kmemsize limit
    
    =================================================================
    This patch description:
    
    We should increment UB_PHYSPAGES failcnt if we failed to charge a page
    and local reclaim does not help. We should also increment UB_SWAPPAGES
    if there is no swap space left. However, currently we just show
    memory.failcnt and memory.memsw.failcnt instead, which are incremented
    anytime we fail to charge the corresponding resource counter no matter
    if we eventually succeed reclaiming memcg or not. Fix this.
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    Reviewed-by: Kirill Tkhai <ktkhai at odin.com>
---
 mm/memcontrol.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f164a94..7d7c2e8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -281,6 +281,8 @@ struct mem_cgroup {
 
 	/* beancounter-related stats */
 	unsigned long long swap_max;
+	atomic_long_t mem_failcnt;
+	atomic_long_t swap_failcnt;
 
 	/*
 	 * Should the accounting and control be hierarchical, per subtree?
@@ -884,6 +886,18 @@ static void mem_cgroup_update_swap_max(struct mem_cgroup *memcg)
 		memcg->swap_max = swap;
 }
 
+static void mem_cgroup_inc_failcnt(struct mem_cgroup *memcg,
+				   gfp_t gfp_mask, unsigned int nr_pages)
+{
+	if (gfp_mask & __GFP_NOWARN)
+		return;
+
+	atomic_long_inc(&memcg->mem_failcnt);
+	if (do_swap_account &&
+	    res_counter_margin(&memcg->memsw) < nr_pages * PAGE_SIZE)
+		atomic_long_inc(&memcg->swap_failcnt);
+}
+
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 					    enum mem_cgroup_events_index idx)
 {
@@ -2618,11 +2632,15 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	if (nr_pages > min_pages)
 		return CHARGE_RETRY;
 
-	if (!(gfp_mask & __GFP_WAIT))
+	if (!(gfp_mask & __GFP_WAIT)) {
+		mem_cgroup_inc_failcnt(mem_over_limit, gfp_mask, nr_pages);
 		return CHARGE_WOULDBLOCK;
+	}
 
-	if (gfp_mask & __GFP_NORETRY)
+	if (gfp_mask & __GFP_NORETRY) {
+		mem_cgroup_inc_failcnt(mem_over_limit, gfp_mask, nr_pages);
 		return CHARGE_NOMEM;
+	}
 
 	ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -2649,6 +2667,9 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	/* If we don't need to call oom-killer at el, return immediately */
 	if (!oom_check)
 		return CHARGE_NOMEM;
+
+	mem_cgroup_inc_failcnt(mem_over_limit, gfp_mask, nr_pages);
+
 	/* check OOM */
 	if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize)))
 		return CHARGE_OOM_DIE;
@@ -5139,7 +5160,7 @@ void mem_cgroup_fill_ub_parms(struct cgroup *cg,
 
 	p->held	= res_counter_read_u64(&memcg->res, RES_USAGE) >> PAGE_SHIFT;
 	p->maxheld = res_counter_read_u64(&memcg->res, RES_MAX_USAGE) >> PAGE_SHIFT;
-	p->failcnt = res_counter_read_u64(&memcg->res, RES_FAILCNT);
+	p->failcnt = atomic_long_read(&memcg->mem_failcnt);
 	lim = res_counter_read_u64(&memcg->res, RES_LIMIT);
 	lim = lim == RESOURCE_MAX ? UB_MAXVALUE :
 		min_t(unsigned long long, lim >> PAGE_SHIFT, UB_MAXVALUE);
@@ -5156,7 +5177,7 @@ void mem_cgroup_fill_ub_parms(struct cgroup *cg,
 	s->held	= res_counter_read_u64(&memcg->memsw, RES_USAGE) >> PAGE_SHIFT;
 	s->held -= p->held;
 	s->maxheld = memcg->swap_max >> PAGE_SHIFT;
-	s->failcnt = res_counter_read_u64(&memcg->memsw, RES_FAILCNT);
+	s->failcnt = atomic_long_read(&memcg->swap_failcnt);
 	lim = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
 	lim = lim == RESOURCE_MAX ? UB_MAXVALUE :
 		min_t(unsigned long long, lim >> PAGE_SHIFT, UB_MAXVALUE);



More information about the Devel mailing list