[Devel] [PATCH RHEL9 COMMIT] memcg: fix oom_guarantee to be considered in global oom and in berserker

Konstantin Khorenko khorenko at virtuozzo.com
Tue Mar 26 16:48:42 MSK 2024


The commit is pushed to "branch-rh9-5.14.0-362.18.1.vz9.40.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-362.18.1.vz9.40.3
------>
commit 0f56a6a2a4bfe53f2774b92fc97349166a819555
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Mon Mar 25 17:33:52 2024 +0800

    memcg: fix oom_guarantee to be considered in global oom and in berserker
    
    After rebase to VZ8 the code which sets memcg->overdraft was moved to
    mem_cgroup_scan_tasks() which is completely wrong (for instance in VZ7
    we had this code in oom_unlock(), so everywhere in oom we always had
    ->overdraft information properly set). Now we don't have proper refresh
    of ->overdraft information in two cases: in global oom and in berserker.
    
    Let's fix this by spliting the refresh code to separate function
    refresh_mem_cgroup_overdraft() and call it where it is really needed
    (where later in stack oom_badness uses the refreshed ->overdraft).
    
    Fixes: c31dabeaf42d ("memcg: add oom_guarantee")
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    Feature: mm/oom: OOM guarantee feature
---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 30 +++++++++++++++++++-----------
 mm/oom_kill.c              |  4 ++++
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4719eb84894f..9c2b8774639e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -22,6 +22,7 @@
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
 #include <linux/virtinfo.h>
+#include <linux/oom.h>
 
 struct mem_cgroup;
 struct obj_cgroup;
@@ -1004,6 +1005,7 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
 
 void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg);
+void refresh_mem_cgroup_overdraft(struct oom_control *oc);
 
 static inline void mem_cgroup_enter_user_fault(void)
 {
@@ -1529,6 +1531,10 @@ static inline unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg)
 	return 0;
 }
 
+static inline void refresh_mem_cgroup_overdraft(struct oom_control *oc)
+{
+}
+
 static inline void
 mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b7c6ee09ab9f..2b6b7dd5a1c0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1308,17 +1308,6 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
 		struct task_struct *task;
-		struct mem_cgroup *parent;
-
-		/*
-		 * Update overdraft of each cgroup under us. This
-		 * information will be used in oom_badness.
-		 */
-		iter->overdraft = mem_cgroup_overdraft(iter);
-		parent = parent_mem_cgroup(iter);
-		if (parent && iter != memcg)
-			iter->overdraft = max(iter->overdraft,
-					parent->overdraft);
 
 		css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
 		while (!ret && (task = css_task_iter_next(&it)))
@@ -1518,6 +1507,25 @@ unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg)
 	return usage > guarantee ? (usage - guarantee) : 0;
 }
 
+void refresh_mem_cgroup_overdraft(struct oom_control *oc)
+{
+	struct mem_cgroup *iter;
+
+	for_each_mem_cgroup_tree(iter, oc->memcg) {
+		struct mem_cgroup *parent;
+
+		/*
+		 * Update overdraft of each cgroup under us. This
+		 * information will be used in oom_badness.
+		 */
+		iter->overdraft = mem_cgroup_overdraft(iter);
+		parent = parent_mem_cgroup(iter);
+		if (parent && iter != oc->memcg)
+			iter->overdraft = max(iter->overdraft,
+					      parent->overdraft);
+	}
+}
+
 bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio)
 {
 	unsigned long anon, file, dcache;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1d44024d6c6f..d27e04295e7f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -378,6 +378,8 @@ static void select_bad_process(struct oom_control *oc)
 {
 	oc->chosen_points = LONG_MIN;
 
+	refresh_mem_cgroup_overdraft(oc);
+
 	if (is_memcg_oom(oc))
 		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
 	else {
@@ -1073,6 +1075,8 @@ static void oom_berserker(struct oom_control *oc)
 	if (rage < 0)
 		return;
 
+	refresh_mem_cgroup_overdraft(oc);
+
 	/*
 	 * So, we are in rage. Kill (1 << rage) youngest tasks that are
 	 * as bad as the victim.


More information about the Devel mailing list