[Devel] [PATCH RHEL7 COMMIT] ve/mm: ignore oom_score_adj of containerized tasks on global OOM

Konstantin Khorenko khorenko at virtuozzo.com
Mon Jun 8 09:10:03 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.11
------>
commit 08502323926d07c9c8bc2733e6ee3cf4896be472
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Mon Jun 8 20:10:03 2015 +0400

    ve/mm: ignore oom_score_adj of containerized tasks on global OOM
    
    Series description:
    
    This patch /proc/PID/{oom_score_adj,oom_adj,oom_score} behavior inside a
    CT, resurrecting /proc/vz/oom_score_adj along the way. For more details,
    see individual patches.
    
    https://jira.sw.ru/browse/PSBM-33849
    ====================================================================
    This patch description:
    
    When the system comes to the point when it is so utterly out of memory
    that it is time to sacrifice someone, it should dismiss container users'
    preferences. It should just go and kill the fattest task in order to
    free room for others to breath.
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    Acked-by: Andrew Vagin <avagin at odin.com>
---
 fs/proc/base.c      |  7 +++++--
 include/linux/oom.h | 10 ++++++++++
 mm/oom_kill.c       | 10 ++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index f551cff..ef44051 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -448,13 +448,16 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 {
 	unsigned long totalpages = totalram_pages + total_swap_pages;
 	unsigned long points = 0;
+	struct mem_cgroup *memcg = NULL;
 
-	if (!ve_is_super(get_exec_env()))
+	if (!ve_is_super(get_exec_env())) {
 		totalpages = min(totalpages, mem_cgroup_total_pages(true));
+		memcg = OOM_BADNESS_DUMMY_MEMCG;
+	}
 
 	read_lock(&tasklist_lock);
 	if (pid_alive(task))
-		points = oom_badness(task, NULL, NULL, totalpages) *
+		points = oom_badness(task, memcg, NULL, totalpages) *
 						1000 / totalpages;
 	read_unlock(&tasklist_lock);
 	return sprintf(buffer, "%lu\n", points);
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 17100d0..04f4f57 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -52,6 +52,16 @@ static inline bool oom_task_origin(const struct task_struct *p)
 /* linux/mm/oom_group.c */
 extern int get_task_oom_score_adj(struct task_struct *t);
 
+/*
+ * oom_score_adj must be 0 for containerized tasks on system-wide OOM, so
+ * oom_badness will always return 0 if memcg == NULL. However, we need to show
+ * real oom_badness when /proc/PID/oom_score is read from inside a container.
+ * Since procuring the memcg corresponding to a container is rather tricky, we
+ * pass OOM_BADNESS_DUMMY_MEMCG instead, which will make oom_badness act as if
+ * it was called on local OOM, but without dereferencing the memcg ptr.
+ */
+#define OOM_BADNESS_DUMMY_MEMCG		((struct mem_cgroup *)1UL)
+
 extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 			  const nodemask_t *nodemask, unsigned long totalpages);
 extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7201f7c..7971030 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -149,9 +149,14 @@ static bool oom_unkillable_task(struct task_struct *p,
 unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 			  const nodemask_t *nodemask, unsigned long totalpages)
 {
+	bool global;
 	long points;
 	long adj;
 
+	global = !memcg;
+	if (memcg == OOM_BADNESS_DUMMY_MEMCG)
+		memcg = NULL;
+
 	if (oom_unkillable_task(p, memcg, nodemask))
 		return 0;
 
@@ -160,6 +165,11 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 		return 0;
 
 	adj = (long)p->signal->oom_score_adj;
+#ifdef CONFIG_VE
+	/* Ignore oom_score_adj of containerized tasks on system-wide OOM */
+	if (global && p->task_ve != &ve0)
+		adj = 0;
+#endif
 	if (adj == OOM_SCORE_ADJ_MIN) {
 		task_unlock(p);
 		return 0;



More information about the Devel mailing list