[Devel] [PATCH 11/17] oom: introduce oom kill timeout
Vladimir Davydov
vdavydov at parallels.com
Fri Aug 14 10:03:35 PDT 2015
Currently, we won't select a new oom victim until the previous one has
passed away. This might lead to a deadlock if an allocating task holds a
lock needed by the victim to complete. To cope with this problem, this
patch introduced oom timeout, after which a new task will be selected
even if the previous victim hasn't died. The timeout can be configured
by sysctl vm.oom_timeout. It equals 5 seconds by default.
https://jira.sw.ru/browse/PSBM-38581
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
include/linux/oom.h | 3 +++
include/linux/sched.h | 2 ++
kernel/sysctl.c | 7 +++++++
mm/memcontrol.c | 2 +-
mm/oom_kill.c | 26 +++++++++++++++++++++-----
5 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 201aa1ab1dd2..f16e35bee7d9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -52,6 +52,8 @@ static inline bool oom_task_origin(const struct task_struct *p)
/* linux/mm/oom_group.c */
extern int get_task_oom_score_adj(struct task_struct *t);
+extern void set_tsk_memdie(struct task_struct *p);
+
extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
const nodemask_t *nodemask, unsigned long totalpages);
extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
@@ -92,4 +94,5 @@ extern struct task_struct *find_lock_task_mm(struct task_struct *p);
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
extern int sysctl_panic_on_oom;
+extern int sysctl_oom_timeout;
#endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 12dc066e0681..ac5f9f2ba4b3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1582,6 +1582,8 @@ struct task_struct {
unsigned int sequential_io_avg;
#endif
+ unsigned long memdie_start;
+
/* reserved for Red Hat */
#ifdef CONFIG_DETECT_HUNG_TASK
RH_KABI_USE(1, unsigned long last_switch_count)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8478a1e762d5..5a3ff6cb15fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1184,6 +1184,13 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "oom_timeout",
+ .data = &sysctl_oom_timeout,
+ .maxlen = sizeof(sysctl_oom_timeout),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
.maxlen = sizeof(sysctl_overcommit_ratio),
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 47d859c6cb6f..b27b4148600b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1916,7 +1916,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
* quickly exit and free its memory.
*/
if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
- set_thread_flag(TIF_MEMDIE);
+ set_tsk_memdie(current);
return;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ca765a82fa1a..0d569020390a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -42,6 +42,7 @@
int sysctl_panic_on_oom;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_dump_tasks = 1;
+int sysctl_oom_timeout = 5 * HZ;
static DEFINE_SPINLOCK(zone_scan_lock);
#ifdef CONFIG_NUMA
@@ -270,6 +271,10 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
if (unlikely(frozen(task)))
__thaw_task(task);
+ smp_rmb(); /* matches smp_wmb() in set_tsk_memdie() */
+ if (time_after(jiffies, task->memdie_start +
+ sysctl_oom_timeout))
+ return OOM_SCAN_CONTINUE;
if (!force_kill)
return OOM_SCAN_ABORT;
}
@@ -283,13 +288,13 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
if (oom_task_origin(task))
return OOM_SCAN_SELECT;
- if (task->flags & PF_EXITING && !force_kill) {
+ if (task->flags & PF_EXITING) {
/*
* If this task is not being ptraced on exit, then wait for it
* to finish before killing some other task unnecessarily.
*/
if (!(task->group_leader->ptrace & PT_TRACE_EXIT))
- return OOM_SCAN_ABORT;
+ return OOM_SCAN_SELECT;
}
return OOM_SCAN_OK;
}
@@ -412,6 +417,15 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
dump_tasks(memcg, nodemask);
}
+void set_tsk_memdie(struct task_struct *p)
+{
+ if (!test_tsk_thread_flag(p, TIF_MEMDIE)) {
+ p->memdie_start = jiffies;
+ smp_wmb(); /* matches smp_rmb() in oom_scan_process_thread() */
+ set_tsk_thread_flag(p, TIF_MEMDIE);
+ }
+}
+
#define K(x) ((x) << (PAGE_SHIFT-10))
/*
* Must be called while holding a reference to p, which will be released upon
@@ -435,7 +449,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
* its children or threads, just set TIF_MEMDIE so it can die quickly
*/
if (p->flags & PF_EXITING) {
- set_tsk_thread_flag(p, TIF_MEMDIE);
+ set_tsk_memdie(p);
goto out;
}
@@ -460,6 +474,8 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
if (child->mm == p->mm)
continue;
+ if (test_tsk_thread_flag(child, TIF_MEMDIE))
+ continue;
/*
* oom_badness() returns 0 if the thread is unkillable
*/
@@ -518,7 +534,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
}
rcu_read_unlock();
- set_tsk_thread_flag(victim, TIF_MEMDIE);
+ set_tsk_memdie(victim);
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
mem_cgroup_note_oom_kill(memcg, victim);
out:
@@ -649,7 +665,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
* quickly exit and free its memory.
*/
if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
- set_thread_flag(TIF_MEMDIE);
+ set_tsk_memdie(current);
boost_dying_task(current);
return;
}
--
2.1.4
More information about the Devel
mailing list