[Devel] [PATCH] ub: correct _ub_get_css() retvalue for dying/stopped containers
Evgenii Shatokhin
eshatokhin at virtuozzo.com
Mon Sep 13 12:04:03 MSK 2021
On 12.09.2021 18:03, Vasily Averin wrote:
> Currently __ub_get_css(ub) for dying or stopped container returns
> ub0-related css as failback. As result read from /proc/bc/<>/ files
> can show ve0-related info.
>
> With this patch, __ub_get_css() will return NULL in such cases,
> and everyone who called of this function now handles this return value.
>
> https://jira.sw.ru/browse/PSBM-123686
> Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
> ---
> include/bc/beancounter.h | 10 +++++++-
> kernel/bc/beancounter.c | 65 +++++++++++++++++++++++++++++++++++-------------
> kernel/bc/io_prio.c | 13 ++++++++--
> kernel/bc/proc.c | 3 +++
> kernel/bc/vm_pages.c | 18 +++++++++++---
> 5 files changed, 85 insertions(+), 24 deletions(-)
>
> diff --git a/include/bc/beancounter.h b/include/bc/beancounter.h
> index e4c5017..dc396ae 100644
> --- a/include/bc/beancounter.h
> +++ b/include/bc/beancounter.h
> @@ -163,6 +163,10 @@ static __always_inline struct cgroup_subsys_state *__ub_get_css(struct user_bean
> css = ACCESS_ONCE(ub->ub_bound_css[idx]);
> if (likely(css && css_tryget(css))) {
> rcu_read_unlock();
> + if ((ub != &ub0) && (css == ub0.ub_bound_css[idx])) {
> + css_put(css);
> + css = NULL;
> + }
> return css;
> }
>
> @@ -183,7 +187,11 @@ static __always_inline struct cgroup_subsys_state *__ub_get_css(struct user_bean
> if (css)
> css_put(css);
>
> - css_get(root_css);
> + if ((ub == &ub0) || (root_css != ub0.ub_bound_css[idx]))
Is it possible that root_css != ub0.ub_bound_css[idx] at this point?
Some kind of race changing ub0.ub_bound_css[idx] under our feet? At the
first glance, it seems unlikely.
> + css_get(root_css);
> + else
> + root_css = NULL;
> +
> return root_css;
> }
>
> diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
> index 5e04d6c..ae8d21a 100644
> --- a/kernel/bc/beancounter.c
> +++ b/kernel/bc/beancounter.c
> @@ -117,33 +117,53 @@ int ub_attach_task(struct user_beancounter *ub, struct task_struct *tsk)
> {
> int ret = 0;
> struct user_beancounter *old_ub = tsk->task_bc.exec_ub;
> - struct cgroup_subsys_state *css;
> + struct cgroup_subsys_state *css, *com, *cob;
>
> if (ub == old_ub)
> goto out;
> +
> + ret = -ENODEV;
> + com = ub_get_mem_css(old_ub);
> + if (!com)
> + goto out;
> +
> + cob = ub_get_blkio_css(old_ub);
> + if (!cob)
> + goto fail_om;
> +
> css = ub_get_mem_css(ub);
> + if (!css)
> + goto fail_ob;
> +
> ret = cgroup_kernel_attach(css->cgroup, tsk);
> css_put(css);
> if (ret)
> - goto out;
> + goto fail_ob;
> +
> + ret = -ENODEV;
> css = ub_get_blkio_css(ub);
> + if (!css)
> + goto fail_blkio;
> +
> ret = cgroup_kernel_attach(css->cgroup, tsk);
> css_put(css);
> if (ret)
> goto fail_blkio;
> +
> ret = cgroup_kernel_attach(ub->css.cgroup, tsk);
> if (ret)
> goto fail_ub;
> +
> +fail_ob:
> + css_put(cob);
> +fail_om:
> + css_put(com);
> out:
> return ret;
> fail_ub:
> - css = ub_get_blkio_css(old_ub);
> - cgroup_kernel_attach(css->cgroup, tsk);
> - css_put(css);
> + cgroup_kernel_attach(cob->cgroup, tsk);
> fail_blkio:
> - css = ub_get_mem_css(old_ub);
> - cgroup_kernel_attach(css->cgroup, tsk);
> - css_put(css);
> + cgroup_kernel_attach(com->cgroup, tsk);
> goto out;
> }
>
> @@ -167,6 +187,9 @@ int ub_update_memcg(struct user_beancounter *ub)
> int ret;
>
> css = ub_get_mem_css(ub);
> + if (!css)
> + return -ENODEV;
> +
> ret = mem_cgroup_apply_beancounter(mem_cgroup_from_cont(css->cgroup),
> ub);
> css_put(css);
> @@ -181,8 +204,10 @@ void ub_sync_pids(struct user_beancounter *ub)
> struct cgroup_subsys_state *css;
>
> css = ub_get_pids_css(ub);
> - pids_cgroup_sync_beancounter(pids_cgroup_from_cont(css->cgroup), ub);
> - css_put(css);
> + if (css) {
> + pids_cgroup_sync_beancounter(pids_cgroup_from_cont(css->cgroup), ub);
> + css_put(css);
> + }
> }
>
> /*
> @@ -193,18 +218,22 @@ void ub_sync_memcg(struct user_beancounter *ub)
> struct cgroup_subsys_state *css;
>
> css = ub_get_mem_css(ub);
> - mem_cgroup_sync_beancounter(mem_cgroup_from_cont(css->cgroup), ub);
> - css_put(css);
> + if (css) {
> + mem_cgroup_sync_beancounter(mem_cgroup_from_cont(css->cgroup), ub);
> + css_put(css);
> + }
> }
>
> unsigned long ub_total_pages(struct user_beancounter *ub, bool swap)
> {
> struct cgroup_subsys_state *css;
> - unsigned long ret;
> + unsigned long ret = 0;
>
> css = ub_get_mem_css(ub);
> - ret = mem_cgroup_total_pages(mem_cgroup_from_cont(css->cgroup), swap);
> - css_put(css);
> + if (css) {
> + ret = mem_cgroup_total_pages(mem_cgroup_from_cont(css->cgroup), swap);
> + css_put(css);
> + }
> return ret;
> }
>
> @@ -576,11 +605,12 @@ static ssize_t ub_cgroup_read(struct cgroup *cg, struct cftype *cft,
> struct cgroup_subsys_state *bound_css;
> char *path;
> int len;
> - ssize_t ret;
> + ssize_t ret = -ENOMEM;
>
> bound_css = __ub_get_css(ub, cft->private);
> + if (!bound_css)
> + goto fail;
>
> - ret = -ENOMEM;
> path = kmalloc(PATH_MAX + 1, GFP_KERNEL);
> if (!path)
> goto out;
> @@ -594,6 +624,7 @@ static ssize_t ub_cgroup_read(struct cgroup *cg, struct cftype *cft,
> kfree(path);
> out:
> css_put(bound_css);
> +fail:
> return ret;
> }
>
> diff --git a/kernel/bc/io_prio.c b/kernel/bc/io_prio.c
> index 16f5024..01a6251 100644
> --- a/kernel/bc/io_prio.c
> +++ b/kernel/bc/io_prio.c
> @@ -35,8 +35,12 @@ int ub_set_ioprio(int id, int ioprio)
> goto out;
>
> css = ub_get_blkio_css(ub);
> + if (!css)
> + goto put;
> +
> ret = blkcg_set_weight(css->cgroup, ioprio_weight[ioprio]);
> css_put(css);
> +put:
> put_beancounter(ub);
> out:
> return ret;
> @@ -59,8 +63,10 @@ static int bc_iostat(struct seq_file *f, struct user_beancounter *bc)
> __ub_percpu_sum(bc, fuse_bytes) >> 9);
>
> css = ub_get_blkio_css(bc);
> - blkcg_show_ub_iostat(css->cgroup, f);
> - css_put(css);
> + if (css) {
> + blkcg_show_ub_iostat(css->cgroup, f);
> + css_put(css);
> + }
> return 0;
> }
>
> @@ -146,6 +152,9 @@ static int bc_ioprio_show(struct seq_file *f, void *v)
> bc = seq_beancounter(f);
>
> css = ub_get_blkio_css(bc);
> + if (!css)
> + return 0;
> +
> weight = blkcg_get_weight(css->cgroup);
> css_put(css);
>
> diff --git a/kernel/bc/proc.c b/kernel/bc/proc.c
> index efcfdbc..2251d6d 100644
> --- a/kernel/bc/proc.c
> +++ b/kernel/bc/proc.c
> @@ -144,6 +144,9 @@ static int bc_proc_nodeinfo_show(struct seq_file *f, void *v)
> unsigned long pages[NR_LRU_LISTS];
>
> css = ub_get_mem_css(seq_beancounter(f));
> + if (!css)
> + return 0;
> +
> for_each_node_state(nid, N_HIGH_MEMORY) {
> memset(pages, 0, sizeof(pages));
> mem_cgroup_get_nr_pages(mem_cgroup_from_cont(css->cgroup),
> diff --git a/kernel/bc/vm_pages.c b/kernel/bc/vm_pages.c
> index e5019c0..7952da1 100644
> --- a/kernel/bc/vm_pages.c
> +++ b/kernel/bc/vm_pages.c
> @@ -127,7 +127,7 @@ int ub_enough_memory(struct mm_struct *mm, long pages)
> struct user_beancounter *ub;
> struct cgroup_subsys_state *css;
> unsigned long flags;
> - int ret;
> + int ret = -ENOMEM;
>
> if (!mm)
> return 0;
> @@ -135,15 +135,16 @@ int ub_enough_memory(struct mm_struct *mm, long pages)
> ub = mm->mm_ub;
>
> if (ub->ub_parms[UB_PRIVVMPAGES].held >
> - ub->ub_parms[UB_PRIVVMPAGES].barrier) {
> - ret = -ENOMEM;
> + ub->ub_parms[UB_PRIVVMPAGES].barrier)
> goto out;
> - }
>
> if (ub == get_ub0() || ub_overcommit_memory)
> return 0;
>
> css = ub_get_mem_css(ub);
> + if (!css)
> + goto out;
> +
> ret = mem_cgroup_enough_memory(mem_cgroup_from_cont(css->cgroup), pages);
> css_put(css);
> out:
> @@ -166,6 +167,9 @@ static int bc_fill_sysinfo(struct user_beancounter *ub,
> return NOTIFY_DONE | NOTIFY_STOP_MASK;
>
> css = ub_get_mem_css(ub);
> + if (!css)
> + return NOTIFY_BAD;
> +
> mem_cgroup_fill_sysinfo(mem_cgroup_from_cont(css->cgroup), si);
> css_put(css);
>
> @@ -185,6 +189,9 @@ static int bc_fill_meminfo(struct user_beancounter *ub,
> goto out;
>
> css = ub_get_mem_css(ub);
> + if (!css)
> + return NOTIFY_BAD;
> +
> mem_cgroup_fill_meminfo(mem_cgroup_from_cont(css->cgroup), mi);
> css_put(css);
>
> @@ -213,6 +220,9 @@ static int bc_fill_vmstat(struct user_beancounter *ub, unsigned long *stat)
> return NOTIFY_OK;
>
> css = ub_get_mem_css(ub);
> + if (!css)
> + return NOTIFY_BAD;
> +
> mem_cgroup_fill_vmstat(mem_cgroup_from_cont(css->cgroup), stat);
> css_put(css);
> return NOTIFY_OK;
>
More information about the Devel
mailing list