[Devel] [PATCH RH7 1/1] ve/proc: Added separate start time field to task_struct to show in container
Valeriy Vdovin
valeriy.vdovin at virtuozzo.com
Mon Jan 13 11:25:40 MSK 2020
https://jira.sw.ru/browse/PSBM-100083
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
---
fs/proc/array.c | 16 ++++------------
include/linux/sched.h | 5 +++++
include/uapi/linux/prctl.h | 7 +++++++
kernel/fork.c | 15 +++++++++++++++
kernel/sys.c | 23 +++++++++++++++++++++++
kernel/ve/ve.c | 22 ++++++++++++++++++++++
6 files changed, 76 insertions(+), 12 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3aa8a7d..fb611b1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -611,19 +611,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
(unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
+ task->real_start_time.tv_nsec;
#ifdef CONFIG_VE
- if (!is_super) {
- struct timespec *ve_start_ts =
- &get_exec_env()->real_start_timespec;
- start_time -=
- (unsigned long long)ve_start_ts->tv_sec * NSEC_PER_SEC
- + ve_start_ts->tv_nsec;
- }
- /* tasks inside a CT can have negative start time e.g. if the CT was
- * migrated from another hw node, in which case we will report 0 in
- * order not to confuse userspace */
- if ((s64)start_time < 0)
- start_time = 0;
+ if (!is_super)
+ start_time = (unsigned long long)
+ timespec_to_ns(&task->real_start_time_ct);
#endif
+
/* convert nsec -> ticks */
start_time = nsec_to_clock_t(start_time);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 07f9954..0832904 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1934,6 +1934,11 @@ struct task_struct {
struct wake_q_node wake_q;
struct prev_cputime prev_cputime;
struct vtime vtime;
+ /*
+ * this is a container-side copy of 'real_start_time' field
+ * shown from inside of a container and modified by host.
+ */
+ struct timespec real_start_time_ct;
#endif /* __GENKSYMS__ */
};
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 02376de..b185113 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -204,5 +204,12 @@ struct prctl_mm_map {
# define PR_SPEC_DISABLE (1UL << 2)
# define PR_SPEC_FORCE_DISABLE (1UL << 3)
# define PR_SPEC_DISABLE_NOEXEC (1UL << 4)
+/* Set task container related fields */
+#define PR_SET_TASK_CT_FIELDS 1000
+#define PR_TASK_CT_FIELDS_START_TIME (1ULL << 0)
+
+struct prctl_task_ct_fields {
+ __s64 real_start_time;
+};
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 3d74228..dbc584c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1350,6 +1350,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
struct task_struct *p;
void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {};
+#ifdef CONFIG_VE
+ struct ve_struct *ve = get_exec_env();
+#endif
+
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
@@ -1472,6 +1476,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
do_posix_clock_monotonic_gettime(&p->start_time);
p->real_start_time = p->start_time;
monotonic_to_bootbased(&p->real_start_time);
+
+ p->real_start_time_ct.tv_sec = 0;
+ p->real_start_time_ct.tv_nsec = 0;
+
+#ifdef CONFIG_VE
+ if (!ve_is_super(ve) && ve->is_running) {
+ p->real_start_time_ct = timespec_sub(p->real_start_time,
+ ve->real_start_timespec);
+ }
+#endif
+
p->io_context = NULL;
p->audit_context = NULL;
if (clone_flags & CLONE_THREAD)
diff --git a/kernel/sys.c b/kernel/sys.c
index c4d633ef..2ce16c7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2457,6 +2457,26 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
}
#endif
+static int prctl_set_task_ct_fields(struct task_struct *t, unsigned long arg,
+ unsigned long flags)
+{
+ struct prctl_task_ct_fields params;
+#ifdef CONFIG_VE
+ struct ve_struct *ve = t->task_ve;
+
+ if (!ve_is_super(ve) && !ve->is_pseudosuper)
+ return -EPERM;
+#endif
+
+ if (copy_from_user(¶ms, (const void __user *)arg, sizeof(params)))
+ return -EFAULT;
+
+ if (flags & PR_TASK_CT_FIELDS_START_TIME)
+ t->real_start_time_ct = ns_to_timespec(params.real_start_time);
+
+ return 0;
+}
+
int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
{
return -EINVAL;
@@ -2684,6 +2704,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return -EINVAL;
error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
break;
+ case PR_SET_TASK_CT_FIELDS:
+ error = prctl_set_task_ct_fields(me, arg2, arg3);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index ad3a698..110c37e 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -339,6 +339,7 @@ static void kthread_attach_fn(struct kthread_work *w)
err = cgroup_attach_task_all(target, current);
if (err)
goto out;
+
out:
work->result = err;
complete(&work->done);
@@ -506,6 +507,15 @@ static int ve_start_container(struct ve_struct *ve)
ve->real_start_timespec = tsk->real_start_time;
}
+ /*
+ * 'current' belongs to this ve, but it's ct-relative start time hasn't
+ * been initialized correctly at copy_process, because ve's start time
+ * was zero. On the other hand, we know this task is this ve's
+ * init (pid 1), * so we set it's relative start time to 0.
+ */
+ tsk->real_start_time_ct.tv_sec = 0;
+ tsk->real_start_time_ct.tv_nsec = 0;
+
/* The value is wrong, but it is never compared to process
* start times */
ve->start_jiffies = get_jiffies_64();
@@ -837,6 +847,7 @@ static void ve_attach(struct cgroup *cg, struct cgroup_taskset *tset)
{
struct ve_struct *ve = cgroup_ve(cg);
struct task_struct *task;
+ struct timespec host_uptime;
cgroup_taskset_for_each(task, cg, tset) {
/* this probihibts ptracing of task entered to VE from host system */
@@ -850,6 +861,17 @@ static void ve_attach(struct cgroup *cg, struct cgroup_taskset *tset)
/* Leave parent exec domain */
task->parent_exec_id--;
+ if (ve->is_running) {
+ /* Viewed from inside of a container these processes
+ * are newly created, so their actual container start
+ * times should be set now.
+ */
+ do_posix_clock_monotonic_gettime(&host_uptime);
+ monotonic_to_bootbased(&host_uptime);
+ task->real_start_time_ct = timespec_sub(host_uptime,
+ ve->real_start_timespec);
+ }
+
task->task_ve = ve;
}
--
1.8.3.1
More information about the Devel
mailing list