[Devel] [PATCH rh7 v2 4/6] oom: introduce oom timeout

Vladimir Davydov vdavydov at parallels.com
Fri Sep 11 08:18:04 PDT 2015


Currently, we won't select a new oom victim until the previous one has
passed away. This might lead to a deadlock if an allocating task holds a
lock needed by the victim to complete. To cope with this problem, this
patch introduced oom timeout, after which a new task will be selected
even if the previous victim hasn't died. The timeout is hard-coded,
equals 5 seconds.

https://jira.sw.ru/browse/PSBM-38581

Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
 include/linux/oom.h |  1 +
 mm/oom_kill.c       | 48 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/linux/oom.h b/include/linux/oom.h
index e19385dd29aa..caeda102e7ba 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -34,6 +34,7 @@ enum oom_scan_t {
 struct oom_context {
 	struct task_struct *owner;
 	struct task_struct *victim;
+	unsigned long oom_start;
 	wait_queue_head_t waitq;
 };
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c9265092825d..130318ee3598 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -45,6 +45,8 @@ int sysctl_oom_dump_tasks;
 
 static DEFINE_SPINLOCK(oom_context_lock);
 
+#define OOM_TIMEOUT	(5 * HZ)
+
 #ifndef CONFIG_MEMCG
 struct oom_context oom_ctx = {
 	.waitq		= __WAIT_QUEUE_HEAD_INITIALIZER(oom_ctx.waitq),
@@ -55,6 +57,7 @@ void init_oom_context(struct oom_context *ctx)
 {
 	ctx->owner = NULL;
 	ctx->victim = NULL;
+	ctx->oom_start = 0;
 	init_waitqueue_head(&ctx->waitq);
 }
 
@@ -286,11 +289,14 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 
 	/*
 	 * This task already has access to memory reserves and is being killed.
-	 * Don't allow any other task to have access to the reserves.
+	 * Try to select another one.
+	 *
+	 * This can only happen if oom_trylock timeout-ed, which most probably
+	 * means that the victim had dead-locked.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
 		if (!force_kill)
-			return OOM_SCAN_ABORT;
+			return OOM_SCAN_CONTINUE;
 	}
 	if (!task->mm)
 		return OOM_SCAN_CONTINUE;
@@ -645,6 +651,7 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 
 bool oom_trylock(struct mem_cgroup *memcg)
 {
+	unsigned long now = jiffies;
 	struct mem_cgroup *iter;
 	struct oom_context *ctx;
 	DEFINE_WAIT(wait);
@@ -660,14 +667,29 @@ bool oom_trylock(struct mem_cgroup *memcg)
 	iter = mem_cgroup_iter(memcg, NULL, NULL);
 	do {
 		ctx = mem_cgroup_oom_context(iter);
-		if (ctx->owner || ctx->victim) {
+		if ((ctx->owner || ctx->victim) &&
+		    time_before(now, ctx->oom_start + OOM_TIMEOUT)) {
 			prepare_to_wait(&ctx->waitq, &wait,
 					TASK_KILLABLE);
 			spin_unlock(&oom_context_lock);
-			schedule();
+			schedule_timeout(ctx->oom_start + OOM_TIMEOUT - now);
 			finish_wait(&ctx->waitq, &wait);
 			mem_cgroup_iter_break(memcg, iter);
 			return false;
+		} else if (ctx->owner || ctx->victim) {
+			/*
+			 * Timeout. Release the context.
+			 */
+			struct task_struct *p = ctx->victim ?: ctx->owner;
+
+			task_lock(p);
+			pr_err("OOM kill timeout: %d (%s)\n",
+			       task_pid_nr(p), p->comm);
+			task_unlock(p);
+			show_stack(p, NULL);
+
+			ctx->owner = ctx->victim = NULL;
+			wake_up_all(&ctx->waitq);
 		}
 	} while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
 
@@ -680,6 +702,7 @@ bool oom_trylock(struct mem_cgroup *memcg)
 		BUG_ON(ctx->owner);
 		BUG_ON(ctx->victim);
 		ctx->owner = current;
+		ctx->oom_start = now;
 	} while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
 
 	spin_unlock(&oom_context_lock);
@@ -689,9 +712,10 @@ bool oom_trylock(struct mem_cgroup *memcg)
 
 void oom_unlock(struct mem_cgroup *memcg)
 {
+	unsigned long now = jiffies;
+	unsigned long timeout = 0;
 	struct mem_cgroup *iter, *victim_memcg = NULL;
 	struct oom_context *ctx;
-	bool need_to_wait = false;
 	DEFINE_WAIT(wait);
 
 	spin_lock(&oom_context_lock);
@@ -714,8 +738,15 @@ void oom_unlock(struct mem_cgroup *memcg)
 		}
 
 		/* Already waiting? */
-		if (need_to_wait)
+		if (timeout > 0)
 			continue;
+		/* Expired? */
+		if (time_after_eq(now, ctx->oom_start + OOM_TIMEOUT))
+			continue;
+
+		timeout = ctx->oom_start + OOM_TIMEOUT - now;
+		BUG_ON(timeout == 0);
+
 		/*
 		 * Remember victim memcg so that we can wait for victim
 		 * to exit below.
@@ -724,13 +755,12 @@ void oom_unlock(struct mem_cgroup *memcg)
 		mem_cgroup_get(iter);
 
 		prepare_to_wait(&ctx->waitq, &wait, TASK_KILLABLE);
-		need_to_wait = true;
 	} while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
 
 	spin_unlock(&oom_context_lock);
 
-	if (need_to_wait) {
-		schedule();
+	if (timeout > 0) {
+		schedule_timeout(timeout);
 		ctx = mem_cgroup_oom_context(victim_memcg);
 		finish_wait(&ctx->waitq, &wait);
 		mem_cgroup_put(victim_memcg);
-- 
2.1.4




More information about the Devel mailing list