[Devel] [PATCH rh7 4/6] oom: introduce oom timeout
Kirill Tkhai
ktkhai at odin.com
Fri Sep 11 06:41:57 PDT 2015
On 11.09.2015 12:54, Vladimir Davydov wrote:
> Currently, we won't select a new oom victim until the previous one has
> passed away. This might lead to a deadlock if an allocating task holds a
> lock needed by the victim to complete. To cope with this problem, this
> patch introduced oom timeout, after which a new task will be selected
> even if the previous victim hasn't died. The timeout is hard-coded,
> equals 5 seconds.
>
> https://jira.sw.ru/browse/PSBM-38581
>
> Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
> ---
> include/linux/oom.h | 1 +
> mm/oom_kill.c | 48 +++++++++++++++++++++++++++++++++++++++---------
> 2 files changed, 40 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> index e19385dd29aa..caeda102e7ba 100644
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -34,6 +34,7 @@ enum oom_scan_t {
> struct oom_context {
> struct task_struct *owner;
> struct task_struct *victim;
> + unsigned long oom_start;
> wait_queue_head_t waitq;
> };
>
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index c9265092825d..bf101f9242ff 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -45,6 +45,8 @@ int sysctl_oom_dump_tasks;
>
> static DEFINE_SPINLOCK(oom_context_lock);
>
> +#define OOM_TIMEOUT (5 * HZ)
> +
> #ifndef CONFIG_MEMCG
> struct oom_context oom_ctx = {
> .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(oom_ctx.waitq),
> @@ -55,6 +57,7 @@ void init_oom_context(struct oom_context *ctx)
> {
> ctx->owner = NULL;
> ctx->victim = NULL;
> + ctx->oom_start = 0;
> init_waitqueue_head(&ctx->waitq);
> }
>
> @@ -286,11 +289,14 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
>
> /*
> * This task already has access to memory reserves and is being killed.
> - * Don't allow any other task to have access to the reserves.
> + * Try to select another one.
> + *
> + * This can only happen if oom_trylock timeout-ed, which most probably
> + * means that the victim had dead-locked.
> */
> if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> if (!force_kill)
> - return OOM_SCAN_ABORT;
> + return OOM_SCAN_CONTINUE;
> }
> if (!task->mm)
> return OOM_SCAN_CONTINUE;
> @@ -645,6 +651,7 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
>
> bool oom_trylock(struct mem_cgroup *memcg)
> {
> + unsigned long now = jiffies;
> struct mem_cgroup *iter;
> struct oom_context *ctx;
> DEFINE_WAIT(wait);
> @@ -660,14 +667,29 @@ bool oom_trylock(struct mem_cgroup *memcg)
> iter = mem_cgroup_iter(memcg, NULL, NULL);
> do {
> ctx = mem_cgroup_oom_context(iter);
> - if (ctx->owner || ctx->victim) {
> + if ((ctx->owner || ctx->victim) &&
> + time_before(now, ctx->oom_start + OOM_TIMEOUT)) {
> prepare_to_wait(&ctx->waitq, &wait,
> TASK_KILLABLE);
> spin_unlock(&oom_context_lock);
> - schedule();
> + schedule_timeout(ctx->oom_start + OOM_TIMEOUT - now);
> finish_wait(&ctx->waitq, &wait);
> mem_cgroup_iter_break(memcg, iter);
> return false;
> + } else if (ctx->owner || ctx->victim) {
> + /*
> + * Timeout. Release the context.
> + */
> + struct task_struct *p = ctx->victim ?: ctx->owner;
> +
> + task_lock(p);
> + pr_err("OOM kill timeout: %d (%s)\n",
> + task_pid_nr(p), p->comm);
> + task_unlock(p);
> + show_stack(p, NULL);
> +
> + ctx->owner = ctx->victim = NULL;
> + wake_up_all(&ctx->waitq);
> }
> } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
>
> @@ -680,6 +702,7 @@ bool oom_trylock(struct mem_cgroup *memcg)
> BUG_ON(ctx->owner);
> BUG_ON(ctx->victim);
> ctx->owner = current;
> + ctx->oom_start = now;
> } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
>
> spin_unlock(&oom_context_lock);
> @@ -689,9 +712,10 @@ bool oom_trylock(struct mem_cgroup *memcg)
>
> void oom_unlock(struct mem_cgroup *memcg)
> {
> + unsigned long now = jiffies;
> + unsigned long timeout = 0;
> struct mem_cgroup *iter, *victim_memcg = NULL;
> struct oom_context *ctx;
> - bool need_to_wait = false;
> DEFINE_WAIT(wait);
>
> spin_lock(&oom_context_lock);
> @@ -714,8 +738,15 @@ void oom_unlock(struct mem_cgroup *memcg)
> }
>
> /* Already waiting? */
> - if (need_to_wait)
> + if (timeout > 0)
> continue;
> + /* Expired? */
> + if (time_after(now, ctx->oom_start + OOM_TIMEOUT))
> + continue;
> +
> + timeout = ctx->oom_start + OOM_TIMEOUT - now;
> + BUG_ON(timeout == 0);
now may be less or more than (oom_start + OOM_TIMEOUT), so why can't
it be equal? Did you mean time_after_eq() above?
> +
> /*
> * Remember victim memcg so that we can wait for victim
> * to exit below.
> @@ -724,13 +755,12 @@ void oom_unlock(struct mem_cgroup *memcg)
> mem_cgroup_get(iter);
>
> prepare_to_wait(&ctx->waitq, &wait, TASK_KILLABLE);
> - need_to_wait = true;
> } while ((iter = mem_cgroup_iter(memcg, iter, NULL)));
>
> spin_unlock(&oom_context_lock);
>
> - if (need_to_wait) {
> - schedule();
> + if (timeout > 0) {
> + schedule_timeout(timeout);
> ctx = mem_cgroup_oom_context(victim_memcg);
> finish_wait(&ctx->waitq, &wait);
> mem_cgroup_put(victim_memcg);
>
More information about the Devel
mailing list