[Devel] [PATCH RHEL7 COMMIT] ve/sched: Do not loose boosting during set_next_entity() called for not busted task

Konstantin Khorenko khorenko at virtuozzo.com
Fri Dec 9 04:11:38 PST 2016


The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.20.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.36.1.vz7.20.13
------>
commit 4bd37ae69b5864a02dd9643a8e02a465e60c3b97
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Fri Dec 9 16:11:38 2016 +0400

    ve/sched: Do not loose boosting during set_next_entity() called for not busted task
    
    Imagine the situation. Not boosted task A is running on cfs_rqA,
    and this moment boosted task B wakes up and becomes queued:
       rq,cfs_rq0
            |
         cfs_rq1
           ...
            |
         cfs_rqN
         /    \
    cfs_rqA  cfs_rqB
       |        |
       A        B
    
    As B is boosted, cfs_rq1 and cfs_rq2 entities are boosted also.
    Than, someone calls sched_setscheduler() for task A:
    
    dequeue_task_fair()  /* call dequeue_entity() only for B and cfs_rqB,
                            as cfs_rqN have other queue is queued.
                            So that, their on_rq remains 1. */
    put_prev_task_fair() /* does not change boosting of cfs_rq1-...cfs_rqN,
                            as they have boosted children */
    ...
    
    set_curr_task_fair() /* dequeues cfs_rq1-...-cfs_rqN from boosted lists,
                            as they remained on_rq after dequeue_task_fair().
                            @Problem is here@ */
    
    enqueue_task_fair()  /* does not change boosting, as it's called for not boosted task */
    
    So, we have cfs_rqB and task B are both boosted and queued on busted lists,
    while their (grand)parents are not on busted lists.
    
    The patch fixes reason of this broken hierary.
    
    https://jira.sw.ru/browse/PSBM-56465
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 kernel/sched/fair.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 96fe1d5..5f276ab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3176,7 +3176,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 }
 
 static void
-set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool boosted)
 {
 	/* 'current' is not kept within the tree. */
 	if (se->on_rq) {
@@ -3187,7 +3187,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 */
 		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
-		if (entity_boosted(se))
+		if (entity_boosted(se) && boosted)
 			__dequeue_boosted_entity(cfs_rq, se);
 	}
 
@@ -5050,7 +5050,15 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
 
 	do {
 		se = pick_next_entity(cfs_rq);
-		set_next_entity(cfs_rq, se);
+		/*
+		 * If se can be dequeued from boosted node,
+		 * we will pick a boosted task at the end,
+		 * because we pick boosted tasks firstly.
+		 * So, "true" covers this case.
+		 * If se is not boosted, "true" does not
+		 * affect any way. Thus, pass it unconditionally.
+		 */
+		set_next_entity(cfs_rq, se, true);
 		cfs_rq = group_cfs_rq(se);
 	} while (cfs_rq);
 
@@ -7859,11 +7867,16 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
 static void set_curr_task_fair(struct rq *rq)
 {
 	struct sched_entity *se = &rq->curr->se;
+	bool boosted = false;
+
+#ifdef CONFIG_CFS_BANDWIDTH
+	boosted = se->boosted;
+#endif
 
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-		set_next_entity(cfs_rq, se);
+		set_next_entity(cfs_rq, se, boosted);
 		/* ensure bandwidth has been allocated on our new cfs_rq */
 		account_cfs_rq_runtime(cfs_rq, 0);
 	}


More information about the Devel mailing list