[Devel] [PATCH RH8 4/8] ve/fs/proc: Make per-thread and per-process allocation latencies.
Konstantin Khorenko
khorenko at virtuozzo.com
Mon May 24 20:13:05 MSK 2021
Merged into previous patch.
f221a0258c4a ve/page_alloc, kstat: account allocation latencies per-task and per-thread
--
Best regards,
Konstantin Khorenko,
Virtuozzo Linux Kernel Team
On 05/18/2021 08:48 PM, Andrey Zhadchenko wrote:
> From: Andrey Ryabinin <aryabinin at virtuozzo.com>
>
> Follow-up for 6d9a9210395e ("ve/page_alloc, kstat: account allocation latencies per-task")
> Make per-thread and per-process allocation latencies:
>
> - /proc/<pid>/vz_latency - cumulative for a thread group
> - /proc/<pid>/tasks/<pid>/vz_latency - thread-specific
>
> During allocation we collect per-thread latency. When thread dies,
> it submits its own latencies into shared task->signal.alloc_lat struct.
> /proc/<pid>/vz_latency - sums allocation latencies over all live threads
> plus latencies of already dead tasks from task->signal.alloc_lat.
>
> https://jira.sw.ru/browse/PSBM-81395
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>
> Cc: Pavel Borzenkov <Pavel.Borzenkov at acronis.com>
>
> Rebase to vz8:
> - As signal_struct moved from sched.h to sched/signal.h so changes did
>
> (cherry-picked from c4cb66d5e70636c2089feb602226292a2513622a)
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
> ---
> fs/proc/base.c | 76 ++++++++++++++++++++++++++++++--------------
> include/linux/sched/signal.h | 3 ++
> kernel/exit.c | 16 ++++++++++
> 3 files changed, 71 insertions(+), 24 deletions(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index c8dc2e9..96db35f 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -533,24 +533,23 @@ static void lastlat_seq_show(struct seq_file *m,
> seq_printf(m, "%-12s %20Lu %20lu\n", name,
> snap->totlat, snap->count);
> }
> +static const char *alloc_descr[] = {
> + "allocatomic:",
> + "alloc:",
> + "allocmp:",
> +};
> +static const int alloc_types[] = {
> + KSTAT_ALLOCSTAT_ATOMIC,
> + KSTAT_ALLOCSTAT_LOW,
> + KSTAT_ALLOCSTAT_LOW_MP,
> +};
>
> -static int vz_lat_show_proc(struct seq_file *m, void *v)
> +static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
> + struct pid *pid, struct task_struct *task)
> {
> int i;
> - struct inode *inode = m->private;
> - struct task_struct *task = get_proc_task(inode);
> - static const char *alloc_descr[] = {
> - "allocatomic:",
> - "alloc:",
> - "allocmp:",
> - };
> - static const int alloc_types[] = {
> - KSTAT_ALLOCSTAT_ATOMIC,
> - KSTAT_ALLOCSTAT_LOW,
> - KSTAT_ALLOCSTAT_LOW_MP,
> - };
>
> - seq_printf(m, "%-11s %20s %20s\n",
> + seq_printf(m, "%-12s %20s %20s\n",
> "Type", "Total_lat", "Calls");
>
> for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
> @@ -559,17 +558,43 @@ static int vz_lat_show_proc(struct seq_file *m, void *v)
> return 0;
> }
>
> -static int vz_lat_open(struct inode *inode, struct file *file)
> +static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
> + struct pid *pid, struct task_struct *task)
> {
> - return single_open(file, vz_lat_show_proc, inode);
> -}
> + int i;
> + unsigned long flags;
> + u64 lat[ARRAY_SIZE(alloc_types)];
> + u64 count[ARRAY_SIZE(alloc_types)];
>
> -static const struct file_operations proc_vz_lat_operations = {
> - .open = vz_lat_open,
> - .read = seq_read,
> - .llseek = seq_lseek,
> - .release = single_release,
> -};
> + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> + lat[i] = task->alloc_lat[alloc_types[i]].totlat;
> + count[i] = task->alloc_lat[alloc_types[i]].count;
> + }
> +
> + if (lock_task_sighand(task, &flags)) {
> + struct task_struct *t = task;
> + while_each_thread(task, t) {
> + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> + lat[i] += t->alloc_lat[alloc_types[i]].totlat;
> + count[i] += t->alloc_lat[alloc_types[i]].count;
> + }
> + }
> + for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> + lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat;
> + count[i] += t->signal->alloc_lat[alloc_types[i]].count;
> + }
> + unlock_task_sighand(task, &flags);
> + }
> +
> + seq_printf(m, "%-12s %20s %20s\n",
> + "Type", "Total_lat", "Calls");
> +
> + for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
> + seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i],
> + lat[i], count[i]);
> +
> + return 0;
> +}
> #endif
>
> static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
> @@ -3097,7 +3122,7 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
> ONE("patch_state", S_IRUSR, proc_pid_patch_state),
> #endif
> #ifdef CONFIG_VE
> - REG("vz_latency", S_IRUGO, proc_vz_lat_operations),
> + ONE("vz_latency", S_IRUGO, proc_tgid_vz_lat),
> #endif
> };
>
> @@ -3480,6 +3505,9 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
> #ifdef CONFIG_LIVEPATCH
> ONE("patch_state", S_IRUSR, proc_pid_patch_state),
> #endif
> +#ifdef CONFIG_VE
> + ONE("vz_latency", S_IRUGO, proc_tid_vz_lat),
> +#endif
> };
>
> static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
> diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
> index 57cf2c5..adfe9cc 100644
> --- a/include/linux/sched/signal.h
> +++ b/include/linux/sched/signal.h
> @@ -211,6 +211,9 @@ struct signal_struct {
> #ifdef CONFIG_TASKSTATS
> struct taskstats *stats;
> #endif
> +#ifdef CONFIG_VE
> + struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
> +#endif
> #ifdef CONFIG_AUDIT
> unsigned audit_tty;
> struct tty_audit_buf *tty_audit_buf;
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 0922ca9..8fde663 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -723,6 +723,20 @@ static void check_stack_usage(void)
> static inline void check_stack_usage(void) {}
> #endif
>
> +void kstat_add_dying(struct task_struct *tsk)
> +{
> +#ifdef CONFIG_VE
> + int i;
> +
> + spin_lock_irq(&tsk->sighand->siglock);
> + for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
> + tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
> + tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count;
> + }
> + spin_unlock_irq(&tsk->sighand->siglock);
> +#endif
> +}
> +
> void __noreturn do_exit(long code)
> {
> struct task_struct *tsk = current;
> @@ -791,6 +805,8 @@ void __noreturn do_exit(long code)
> #endif
> if (tsk->mm)
> setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
> + } else {
> + kstat_add_dying(tsk);
> }
> acct_collect(code, group_dead);
> if (group_dead)
>
More information about the Devel
mailing list