[Devel] [PATCH RHEL7 COMMIT] timers: Port diff-ve-timers-convert-ve-monotonic-to-abs-time-when-setting-timerfd-2

Konstantin Khorenko khorenko at virtuozzo.com
Tue Jun 9 14:00:16 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.11
------>
commit ca1f630687104113f6e78f37fddb684187317ee2
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Wed Jun 10 01:00:16 2015 +0400

    timers: Port diff-ve-timers-convert-ve-monotonic-to-abs-time-when-setting-timerfd-2
    
    Need this for docker, as sometimes systemd-tmpfiles-clean.timer inside
    a PCS7 CT is spamming dbus with requests to start corresponding service.
    And at the same time Docker tries to create cgroup for container and
    attach it to hierarchies like memory and blkio.
    
    That is because systemd timer was triggered with non-virtualized timerfd
    using plain host clock but check that timer is successfull uses
    virtualized clock_gettime and don't pass before proper(in-container)
    timer activation. And timers charges again and again starts service
    got in busy loop.
    
    https://jira.sw.ru/browse/PSBM-34017
    
    v2: move the stubs to ve.h
    
    Port the following RH6 commit:
    
      Author: Vladimir Davydov
      Email: vdavydov at parallels.com
      Subject: fs: convert ve monotonic to abs time when setting timerfd
      Date: Fri, 15 Feb 2013 11:57:09 +0400
    
      * [timers] corrected TFD_TIMER_ABSTIME timer handling,
        the issue led to high cpu usage inside a Fedora 18 CT
        by 'init' process (PSBM-18284)
    
      Monotonic time inside container, as it can be obtained using various
      system calls such as clock_gettime, is reported since start of the container,
      not since start of the whole system. This was made in order to avoid time
      issues while a container is migrated between different physical hosts, but this
      also introduced a lot of problems in time- related system calls because
      absolute monotonic time, which is in fact relative to container, passed to those
      system calls must be converted to system-wide monotonic time, which is used by
      kernel hrtimers.
    
      One of those buggy system calls is timerfd_settime which accepts as an
      argument absolute time if flag TFD_TIMER_ABSTIME is specified.
    
      The patch fixes it by converting container monotonic time to system-
      wide monotonic time using the monotonic_ve_to_abs() function, which was
      introduced earlier and is now exported for that reason.
    
      https://jira.sw.ru/browse/PSBM-18284
    
      Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    Signed-off-by: Kirill Tkhai <ktkhai at odin.com>
    Reviewed-by: Vladimir Davydov <vdavydov at parallels.com>
---
 fs/timerfd.c          | 6 +++++-
 include/linux/ve.h    | 6 ++++++
 kernel/posix-timers.c | 9 ++-------
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/timerfd.c b/fs/timerfd.c
index ad10267..9f01709 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -24,6 +24,7 @@
 #include <linux/syscalls.h>
 #include <linux/compat.h>
 #include <linux/rcupdate.h>
+#include <linux/ve.h>
 
 struct timerfd_ctx {
 	struct hrtimer tmr;
@@ -349,7 +350,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 }
 
 static int do_timerfd_settime(int ufd, int flags, 
-		const struct itimerspec *new,
+		struct itimerspec *new,
 		struct itimerspec *old)
 {
 	struct fd f;
@@ -395,6 +396,9 @@ static int do_timerfd_settime(int ufd, int flags,
 	/*
 	 * Re-program the timer to the new value ...
 	 */
+	if ((flags & TFD_TIMER_ABSTIME) &&
+	    (new->it_value.tv_sec || new->it_value.tv_nsec))
+		monotonic_ve_to_abs(ctx->clockid, &new->it_value);
 	ret = timerfd_setup(ctx, flags, new);
 
 	spin_unlock_irq(&ctx->wqh.lock);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 0029f57..e48a1a3 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -211,6 +211,8 @@ static inline struct ve_struct *cgroup_ve(struct cgroup *cgroup)
 }
 
 extern unsigned long long ve_relative_clock(struct timespec * ts);
+extern void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp);
+extern void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp);
 
 #ifdef CONFIG_VTTYS
 extern int vtty_open_master(int veid, int idx);
@@ -246,6 +248,10 @@ static inline void ve_exit_ns(struct pid_namespace *ns) { }
 
 static const void *ve_namespace(struct device *dev) { return NULL; }
 
+static inline void monotonic_abs_to_ve(clockid_t which_clock,
+				struct timespec *tp) { }
+static inline void monotonic_ve_to_abs(clockid_t which_clock,
+				struct timepsec *tp) { }
 #endif	/* CONFIG_VE */
 
 #endif /* _LINUX_VE_H */
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index b9c849f..a4b1ca7 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -133,7 +133,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
 	 (which_clock) == CLOCK_MONOTONIC_COARSE)
 
 #ifdef CONFIG_VE
-static void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp)
+void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp)
 {
 	struct ve_struct *ve = get_exec_env();
 
@@ -143,7 +143,7 @@ static void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp)
 				tp->tv_nsec - ve->start_timespec.tv_nsec);
 }
 
-static void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp)
+void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp)
 {
 	struct ve_struct *ve = get_exec_env();
 
@@ -152,11 +152,6 @@ static void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp)
 				tp->tv_sec + ve->start_timespec.tv_sec,
 				tp->tv_nsec + ve->start_timespec.tv_nsec);
 }
-#else
-static inline void monotonic_abs_to_ve(clockid_t which_clock,
-				       struct timespec *tp) { }
-static inline void monotonic_ve_to_abs(clockid_t which_clock,
-				       struct timepsec *tp) { }
 #endif
 
 /*



More information about the Devel mailing list