[Devel] [PATCH rh7] Port diff-ve-timers-convert-ve-monotonic-to-abs-time-when-setting-timerfd-2
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Mon Jun 8 09:17:39 PDT 2015
Need this for docker, as sometimes systemd-tmpfiles-clean.timer inside
PCS7 ct is spamming dbus with requests to start corresponding service.
And at the same time docker tries to create cgroup for container and
attach it to hierarchies like memory and blkio.
That is because systemd timer was triggered with non-virtualized timerfd
using plain host clock but check that timer is successfull uses
virtualized clock_gettime and don't pass before proper(in-container)
timer activation. And timers charges again and again starts service
got in busy loop.
minor changes: remove const for do_timerfd_settime argumen new, included
ve.h in timerfd.c
Port the following RH6 commit:
Author: Vladimir Davydov
Email: vdavydov at parallels.com
Subject: fs: convert ve monotonic to abs time when setting timerfd
Date: Fri, 15 Feb 2013 11:57:09 +0400
* [timers] corrected TFD_TIMER_ABSTIME timer handling,
the issue led to high cpu usage inside a Fedora 18 CT
by 'init' process (PSBM-18284)
Monotonic time inside container, as it can be obtained using various
system calls such as clock_gettime, is reported since start of the container,
not since start of the whole system. This was made in order to avoid time
issues while a container is migrated between different physical hosts, but this
also introduced a lot of problems in time- related system calls because
absolute monotonic time, which is in fact relative to container, passed to those
system calls must be converted to system-wide monotonic time, which is used by
kernel hrtimers.
One of those buggy system calls is timerfd_settime which accepts as an
argument absolute time if flag TFD_TIMER_ABSTIME is specified.
The patch fixes it by converting container monotonic time to system-
wide monotonic time using the monotonic_ve_to_abs() function, which was
introduced earlier and is now exported for that reason.
https://jira.sw.ru/browse/PSBM-18284
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
fs/timerfd.c | 6 +++++-
include/linux/ve.h | 2 ++
kernel/posix-timers.c | 12 ++++++------
3 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index ad10267..73cf8f11 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -24,6 +24,7 @@
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/rcupdate.h>
+#include <linux/ve.h>
struct timerfd_ctx {
struct hrtimer tmr;
@@ -349,7 +350,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
}
static int do_timerfd_settime(int ufd, int flags,
- const struct itimerspec *new,
+ struct itimerspec *new,
struct itimerspec *old)
{
struct fd f;
@@ -395,6 +396,9 @@ static int do_timerfd_settime(int ufd, int flags,
/*
* Re-program the timer to the new value ...
*/
+ if ((flags & TFD_TIMER_ABSTIME) &&
+ (new->it_value.tv_sec || new->it_value.tv_nsec))
+ monotonic_ve_to_abs(ctx->clockid, &new->it_value);
ret = timerfd_setup(ctx, flags, new);
spin_unlock_irq(&ctx->wqh.lock);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 0029f57..758ff85 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -211,6 +211,8 @@ static inline struct ve_struct *cgroup_ve(struct cgroup *cgroup)
}
extern unsigned long long ve_relative_clock(struct timespec * ts);
+extern void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp);
+extern void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp);
#ifdef CONFIG_VTTYS
extern int vtty_open_master(int veid, int idx);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index b9c849f..add70d8 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -133,7 +133,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
(which_clock) == CLOCK_MONOTONIC_COARSE)
#ifdef CONFIG_VE
-static void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp)
+void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp)
{
struct ve_struct *ve = get_exec_env();
@@ -143,7 +143,7 @@ static void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp)
tp->tv_nsec - ve->start_timespec.tv_nsec);
}
-static void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp)
+void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp)
{
struct ve_struct *ve = get_exec_env();
@@ -153,10 +153,10 @@ static void monotonic_ve_to_abs(clockid_t which_clock, struct timespec *tp)
tp->tv_nsec + ve->start_timespec.tv_nsec);
}
#else
-static inline void monotonic_abs_to_ve(clockid_t which_clock,
- struct timespec *tp) { }
-static inline void monotonic_ve_to_abs(clockid_t which_clock,
- struct timepsec *tp) { }
+inline void monotonic_abs_to_ve(clockid_t which_clock,
+ struct timespec *tp) { }
+inline void monotonic_ve_to_abs(clockid_t which_clock,
+ struct timepsec *tp) { }
#endif
/*
--
1.9.3
More information about the Devel
mailing list