[Devel] [PATCH RHEL7 COMMIT] bc/memcg: show correct swap max for beancounters

Konstantin Khorenko khorenko at virtuozzo.com
Mon May 25 02:18:53 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.5
------>
commit a74376e2dde136629060dcb957d84c85545e3961
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Mon May 25 13:18:53 2015 +0400

    bc/memcg: show correct swap max for beancounters
    
    Patchset description:
    
    ub: fix memcg resource accounting
    
    UB_PHYSPAGES, UB_SWAPPAGES, UB_KMEMSIZE are now accounted on the memcg
    size. However, some stats differ between beancounters and memcg (e.g.
    failcnt). This patch set fixes them. For more details, see individual
    patches.
    
    Related to https://jira.sw.ru/browse/PSBM-20089
    
    Vladimir Davydov (4):
      memcg: show correct swap max for beancounters
      memcg: show correct mem/swap failcnt for beancounters
      memcg: assure swap->held/maxheld < swap->limit for beancounters
      ub: deprecate kmemsize limit
    
    =================================================================
    This patch description:
    
    Currently, we assume
    
      swap.max_held = memory.memsw.max_usage - memory.max_usage
    
    which is obviously wrong. Keep track of maximal swap usage and report it
    instead. Note, we don't care about precision and hence don't use any
    locking to protect the maximal value, since it doesn't really matter if
    max_held will be a couple of pages greater/less than the true value.
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    Reviewed-by: Kirill Tkhai <ktkhai at odin.com>
---
 mm/memcontrol.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9dda309..f164a94 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -278,6 +278,10 @@ struct mem_cgroup {
 	 * the counter to account for kernel memory usage.
 	 */
 	struct res_counter kmem;
+
+	/* beancounter-related stats */
+	unsigned long long swap_max;
+
 	/*
 	 * Should the accounting and control be hierarchical, per subtree?
 	 */
@@ -868,6 +872,18 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
 	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
 }
 
+static void mem_cgroup_update_swap_max(struct mem_cgroup *memcg)
+{
+	long long swap;
+
+	swap = res_counter_read_u64(&memcg->memsw, RES_USAGE) -
+		res_counter_read_u64(&memcg->res, RES_USAGE);
+
+	/* This is racy, but we don't have to be absolutely precise */
+	if (swap > (long long)memcg->swap_max)
+		memcg->swap_max = swap;
+}
+
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 					    enum mem_cgroup_events_index idx)
 {
@@ -3892,6 +3908,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
 	memcg_check_events(memcg, page);
 	if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
 		mem_cgroup_swap_statistics(memcg, true);
+		mem_cgroup_update_swap_max(memcg);
 		css_get(&memcg->css);
 	}
 	/*
@@ -5137,14 +5154,12 @@ void mem_cgroup_fill_ub_parms(struct cgroup *cg,
 	k->barrier = k->limit = lim;
 
 	s->held	= res_counter_read_u64(&memcg->memsw, RES_USAGE) >> PAGE_SHIFT;
-	s->maxheld = res_counter_read_u64(&memcg->memsw, RES_MAX_USAGE) >> PAGE_SHIFT;
+	s->held -= p->held;
+	s->maxheld = memcg->swap_max >> PAGE_SHIFT;
 	s->failcnt = res_counter_read_u64(&memcg->memsw, RES_FAILCNT);
 	lim = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
 	lim = lim == RESOURCE_MAX ? UB_MAXVALUE :
 		min_t(unsigned long long, lim >> PAGE_SHIFT, UB_MAXVALUE);
-
-	s->held -= p->held;
-	s->maxheld -= p->maxheld;
 	if (lim != UB_MAXVALUE)
 		lim -= p->limit;
 	s->barrier = s->limit = lim;
@@ -6580,6 +6595,10 @@ static void __mem_cgroup_clear_mc(void)
 		/* we've already done css_get(mc.to) */
 		mc.moved_swap = 0;
 	}
+	if (do_swap_account) {
+		mem_cgroup_update_swap_max(from);
+		mem_cgroup_update_swap_max(to);
+	}
 	memcg_oom_recover(from);
 	memcg_oom_recover(to);
 	wake_up_all(&mc.waitq);



More information about the Devel mailing list