[Devel] [PATCH 1/2] sched: introduce cond_resched_may_throttle

Kirill Tkhai ktkhai at virtuozzo.com
Mon Jan 11 09:15:35 PST 2016


From: Kirill Tkhai <ktkhai at odin.com>

Port diff-sched-introduce-cond_resched_may_throttle by Vladimir Davydov:

Since cond_resched() is sometimes called under a semaphore, it was
forbidden to throttle tasks there in order to eliminate the possibility
of the priority inversion problem. However, it turned out that some
tasks must be throttled on cond_resched(), otherwise they won't have a
chance to be throttled at all breaking the concept of CPU limits. The
most notable (and currently the only identified) example is vm
hypervisors such as KVM or Balalaika.

To fix this problem, the patch introduces the new function
cond_resched_may_throttle(), which works just like usual cond_resched()
except it allows the scheduler to throttle the caller's task group. This
function must be used by those pieces of software that can only be
throttled on cond_resched() under certain conditions. This function is
to and will be used by Balalaika - I'm going to send the corresponding
patch. Perhaps, it's also worth while using it in KVM, however there is
no rush in it because I doubt anyone will use KVM, vzkernel, and our
hacked CPU limits altogether so it can wait.

https://jira.sw.ru/browse/PSBM-18888

Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 include/linux/sched.h |    8 ++++++++
 kernel/sched/core.c   |   23 +++++++++++++++++++----
 kernel/sched/fair.c   |    3 ++-
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4560071..4bbd391 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1246,6 +1246,7 @@ struct task_struct {
 	unsigned sched_interruptible_sleep:1;
 
 	unsigned woken_while_running:1;
+	unsigned may_throttle:1;
 
 	pid_t pid;
 	pid_t tgid;
@@ -2697,6 +2698,13 @@ extern int _cond_resched(void);
 	_cond_resched();			\
 })
 
+extern int _cond_resched_may_throttle(void);
+
+#define cond_resched_may_throttle() ({		\
+	__might_sleep(__FILE__, __LINE__, 0);	\
+	_cond_resched_may_throttle();		\
+})
+
 extern int __cond_resched_lock(spinlock_t *lock);
 
 #ifdef CONFIG_PREEMPT_COUNT
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 581bfd0..c8ac8bd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4775,23 +4775,38 @@ static inline int should_resched(void)
 	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
 }
 
-static void __cond_resched(void)
+static void __cond_resched(bool may_throttle)
 {
 	add_preempt_count(PREEMPT_ACTIVE);
+	if (may_throttle)
+		current->may_throttle = 1;
 	__schedule();
+	if (may_throttle)
+		current->may_throttle = 0;
 	sub_preempt_count(PREEMPT_ACTIVE);
 }
 
 int __sched _cond_resched(void)
 {
 	if (should_resched()) {
-		__cond_resched();
+		__cond_resched(false);
 		return 1;
 	}
 	return 0;
 }
 EXPORT_SYMBOL(_cond_resched);
 
+int __sched _cond_resched_may_throttle(void)
+{
+	if (should_resched()) {
+		__cond_resched(true);
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(_cond_resched_may_throttle);
+
+
 /*
  * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
  * call schedule, and on return reacquire the lock.
@@ -4810,7 +4825,7 @@ int __cond_resched_lock(spinlock_t *lock)
 	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
 		if (resched)
-			__cond_resched();
+			__cond_resched(false);
 		else
 			cpu_relax();
 		ret = 1;
@@ -4826,7 +4841,7 @@ int __sched __cond_resched_softirq(void)
 
 	if (should_resched()) {
 		local_bh_enable();
-		__cond_resched();
+		__cond_resched(false);
 		local_bh_disable();
 		return 1;
 	}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ece14a4..dda3f57 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -957,7 +957,8 @@ static inline void update_entity_boost(struct sched_entity *se)
 					p->woken_while_running;
 			p->woken_while_running = 0;
 		} else
-			se->boosted = sched_feat(BOOST_PREEMPT);
+			se->boosted = sched_feat(BOOST_PREEMPT) &&
+				      !p->may_throttle;
 	}
 }
 



More information about the Devel mailing list