[Devel] [PATCH 2/2 v1] ve/cgroup: Added pointers to owning ve to root cgroups
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Tue Mar 17 14:10:14 MSK 2020
On 3/17/20 1:28 PM, Valeriy Vdovin wrote:
> Follow-up patch to per-cgroup release_agent property. release_agent
> notifications are spawned from a special kthread, running under ve0. But
> newly spawned tasks should run under their own ve context. Easy way to
> pass this information to a spawning thread is by adding 've_owner' field
> to a root cgroup. At notification any cgroup can be walked upwards to
> it's root and get ve_owner from there.
>
> https://jira.sw.ru/browse/PSBM-83887
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
> ---
> include/linux/cgroup.h | 3 +++
> include/linux/ve.h | 8 ++++++++
> kernel/cgroup.c | 33 +++++++++++++++++++++++++++++++++
> kernel/ve/ve.c | 3 +++
> 4 files changed, 47 insertions(+)
>
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index cad5b4f..513658b 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -286,6 +286,9 @@ struct cgroup {
> struct simple_xattrs xattrs;
> u64 subgroups_limit;
>
> + /* ve_owner, responsible for running release agent. */
> + struct ve_struct *ve_owner;
> +
> /*
> * Per-cgroup path to release agent binary for release
> * notifications.
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 9d60838..9cc5257 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -268,6 +268,14 @@ static inline struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
> struct seq_file;
> struct kernel_cpustat;
>
> +/*
> + * cgroup needs to know it's owning ve for some of operations, but
> + * cgroup's lifetime is independant of ve's, in theory ve can be destroyed
> + * earlier than some of it's cgroups.
> + */
> +void ve_add_referring_cgroup(struct ve_struct *ve, struct cgroup *cgrp);
> +void ve_remove_referring_cgroups(struct ve_struct *ve);
> +
> #if defined(CONFIG_VE) && defined(CONFIG_CGROUP_SCHED)
> int ve_show_cpu_stat(struct ve_struct *ve, struct seq_file *p);
> int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 0b64d88..105536b 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4318,6 +4318,7 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
> mutex_lock(&cgroup_mutex);
> for_each_active_root(root) {
> cgrp = task_cgroup_from_root(ve->init_task, root);
> + cgrp->ve_owner = ve;
> set_bit(CGRP_VE_ROOT, &cgrp->flags);
> err = cgroup_add_file_on_mark_ve(cgrp);
> if (err)
> @@ -4329,6 +4330,19 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
> return err;
> }
>
> +void cgroup_unbind_roots_from_ve(struct ve_struct *ve)
> +{
> + struct cgroup *cgrp;
> + struct cgroupfs_root *root;
> +
> + mutex_lock(&cgroup_mutex);
> + for_each_active_root(root) {
> + cgrp = task_cgroup_from_root(ve->init_task, root);
> + cgrp->ve_owner = NULL;
> + }
> + mutex_unlock(&cgroup_mutex);
> +}
> +
> struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
> {
> struct cgroup *ve_root = NULL;
> @@ -5455,6 +5469,7 @@ static void cgroup_release_agent(struct work_struct *work)
> raw_spin_lock(&release_list_lock);
> while (!list_empty(&release_list)) {
> char *argv[3], *envp[3];
> + struct ve_struct *ve;
> int i, err;
> char *pathbuf = NULL, *agentbuf = NULL;
> struct cgroup *root_cgrp;
> @@ -5468,7 +5483,20 @@ static void cgroup_release_agent(struct work_struct *work)
> goto continue_free;
> if (__cgroup_path(cgrp, pathbuf, PAGE_SIZE, true) < 0)
> goto continue_free;
> +
> + /*
> + * root_cgrp is the relative root for cgrp, for host
> + * cgroups root_cgrp is root->top_cgroup, for container
> + * cgroups it is any up the parent chain from cgrp marked
> + * as VE_ROOT.
> + */
> root_cgrp = cgroup_get_local_root(cgrp);
> +
> + ve = NULL;
> + if (root_cgrp->ve_owner)
> + ve = root_cgrp->ve_owner;
> + if (!ve)
> + goto continue_free;
> if (root_cgrp->release_agent_path)
> agentbuf = kstrdup(root_cgrp->release_agent_path,
> GFP_KERNEL);
> @@ -5490,7 +5518,12 @@ static void cgroup_release_agent(struct work_struct *work)
> * since the exec could involve hitting disk and hence
> * be a slow process */
> mutex_unlock(&cgroup_mutex);
> +#ifdef CONFIG_VE
> + err = call_usermodehelper_fns_ve(ve, argv[0], argv,
> + envp, UMH_WAIT_EXEC, NULL, NULL, NULL);
> +#else
> err = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
> +#endif
> if (err < 0)
> pr_warn_ratelimited("cgroup release_agent "
> "%s %s failed: %d\n",
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index a64b4a7..37353fb 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -480,6 +480,7 @@ static void ve_drop_context(struct ve_struct *ve)
> static const struct timespec zero_time = { };
>
> extern int cgroup_mark_ve_root(struct ve_struct *ve);
> +extern void cgroup_unbind_roots_from_ve(struct ve_struct *ve);
>
> /* under ve->op_sem write-lock */
> static int ve_start_container(struct ve_struct *ve)
> @@ -588,10 +589,12 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
> up_write(&ve->op_sem);
> }
>
> void ve_exit_ns(struct pid_namespace *pid_ns)
> {
> struct ve_struct *ve = current->task_ve;
>
> + cgroup_unbind_roots_from_ve(ve);
> /*
> * current->cgroups already switched to init_css_set in cgroup_exit(),
> * but current->task_ve still points to our exec ve.
This hunk looks broken and does not apply, should be something like:
@@ -595,6 +596,7 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
{
struct ve_struct *ve = current->task_ve;
+ cgroup_unbind_roots_from_ve(ve);
/*
* current->cgroups already switched to init_css_set in
cgroup_exit(),
* but current->task_ve still points to our exec ve.
>
--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
More information about the Devel
mailing list