[Devel] [PATCH RHEL7 COMMIT] ve/sched: Do not loose boosting during set_next_entity() called for not busted task

Kirill Tkhai ktkhai at virtuozzo.com
Mon Dec 12 10:53:58 PST 2016


Konstantin, please, revert this. The patch is not correct, because we always dequeue
boosted parents of current task, while it's running, and queue it back, when it's put.

On 09.12.2016 15:11, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.20.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-327.36.1.vz7.20.13
> ------>
> commit 4bd37ae69b5864a02dd9643a8e02a465e60c3b97
> Author: Kirill Tkhai <ktkhai at virtuozzo.com>
> Date:   Fri Dec 9 16:11:38 2016 +0400
> 
>     ve/sched: Do not loose boosting during set_next_entity() called for not busted task
>     
>     Imagine the situation. Not boosted task A is running on cfs_rqA,
>     and this moment boosted task B wakes up and becomes queued:
>        rq,cfs_rq0
>             |
>          cfs_rq1
>            ...
>             |
>          cfs_rqN
>          /    \
>     cfs_rqA  cfs_rqB
>        |        |
>        A        B
>     
>     As B is boosted, cfs_rq1 and cfs_rq2 entities are boosted also.
>     Than, someone calls sched_setscheduler() for task A:
>     
>     dequeue_task_fair()  /* call dequeue_entity() only for B and cfs_rqB,
>                             as cfs_rqN have other queue is queued.
>                             So that, their on_rq remains 1. */
>     put_prev_task_fair() /* does not change boosting of cfs_rq1-...cfs_rqN,
>                             as they have boosted children */
>     ...
>     
>     set_curr_task_fair() /* dequeues cfs_rq1-...-cfs_rqN from boosted lists,
>                             as they remained on_rq after dequeue_task_fair().
>                             @Problem is here@ */
>     
>     enqueue_task_fair()  /* does not change boosting, as it's called for not boosted task */
>     
>     So, we have cfs_rqB and task B are both boosted and queued on busted lists,
>     while their (grand)parents are not on busted lists.
>     
>     The patch fixes reason of this broken hierary.
>     
>     https://jira.sw.ru/browse/PSBM-56465
>     
>     Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
>  kernel/sched/fair.c | 21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 96fe1d5..5f276ab 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3176,7 +3176,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
>  }
>  
>  static void
> -set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool boosted)
>  {
>  	/* 'current' is not kept within the tree. */
>  	if (se->on_rq) {
> @@ -3187,7 +3187,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  		 */
>  		update_stats_wait_end(cfs_rq, se);
>  		__dequeue_entity(cfs_rq, se);
> -		if (entity_boosted(se))
> +		if (entity_boosted(se) && boosted)
>  			__dequeue_boosted_entity(cfs_rq, se);
>  	}
>  
> @@ -5050,7 +5050,15 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
>  
>  	do {
>  		se = pick_next_entity(cfs_rq);
> -		set_next_entity(cfs_rq, se);
> +		/*
> +		 * If se can be dequeued from boosted node,
> +		 * we will pick a boosted task at the end,
> +		 * because we pick boosted tasks firstly.
> +		 * So, "true" covers this case.
> +		 * If se is not boosted, "true" does not
> +		 * affect any way. Thus, pass it unconditionally.
> +		 */
> +		set_next_entity(cfs_rq, se, true);
>  		cfs_rq = group_cfs_rq(se);
>  	} while (cfs_rq);
>  
> @@ -7859,11 +7867,16 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
>  static void set_curr_task_fair(struct rq *rq)
>  {
>  	struct sched_entity *se = &rq->curr->se;
> +	bool boosted = false;
> +
> +#ifdef CONFIG_CFS_BANDWIDTH
> +	boosted = se->boosted;
> +#endif
>  
>  	for_each_sched_entity(se) {
>  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
>  
> -		set_next_entity(cfs_rq, se);
> +		set_next_entity(cfs_rq, se, boosted);
>  		/* ensure bandwidth has been allocated on our new cfs_rq */
>  		account_cfs_rq_runtime(cfs_rq, 0);
>  	}
> 


More information about the Devel mailing list