[Devel] [PATCH 2/2 v1] ve/cgroup: Added pointers to owning ve to root cgroups

Tue Mar 17 14:10:14 MSK 2020


On 3/17/20 1:28 PM, Valeriy Vdovin wrote:
> Follow-up patch to per-cgroup release_agent property. release_agent
> notifications are spawned from a special kthread, running under ve0. But
> newly spawned tasks should run under their own ve context. Easy way to
> pass this information to a spawning thread is by adding 've_owner' field
> to a root cgroup. At notification any cgroup can be walked upwards to
> it's root and get ve_owner from there.
> 
> https://jira.sw.ru/browse/PSBM-83887
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
> ---
>   include/linux/cgroup.h |  3 +++
>   include/linux/ve.h     |  8 ++++++++
>   kernel/cgroup.c        | 33 +++++++++++++++++++++++++++++++++
>   kernel/ve/ve.c         |  3 +++
>   4 files changed, 47 insertions(+)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index cad5b4f..513658b 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -286,6 +286,9 @@ struct cgroup {
>   	struct simple_xattrs xattrs;
>   	u64 subgroups_limit;
>   
> +	/* ve_owner, responsible for running release agent. */
> +	struct ve_struct *ve_owner;
> +
>   	/*
>   	 * Per-cgroup path to release agent binary for release
>   	 * notifications.
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 9d60838..9cc5257 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -268,6 +268,14 @@ static inline struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
>   struct seq_file;
>   struct kernel_cpustat;
>   
> +/*
> + * cgroup needs to know it's owning ve for some of operations, but
> + * cgroup's lifetime is independant of ve's, in theory ve can be destroyed
> + * earlier than some of it's cgroups.
> + */
> +void ve_add_referring_cgroup(struct ve_struct *ve, struct cgroup *cgrp);
> +void ve_remove_referring_cgroups(struct ve_struct *ve);
> +
>   #if defined(CONFIG_VE) && defined(CONFIG_CGROUP_SCHED)
>   int ve_show_cpu_stat(struct ve_struct *ve, struct seq_file *p);
>   int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 0b64d88..105536b 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4318,6 +4318,7 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
>   	mutex_lock(&cgroup_mutex);
>   	for_each_active_root(root) {
>   		cgrp = task_cgroup_from_root(ve->init_task, root);
> +		cgrp->ve_owner = ve;
>   		set_bit(CGRP_VE_ROOT, &cgrp->flags);
>   		err = cgroup_add_file_on_mark_ve(cgrp);
>   		if (err)
> @@ -4329,6 +4330,19 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
>   	return err;
>   }
>   
> +void cgroup_unbind_roots_from_ve(struct ve_struct *ve)
> +{
> +	struct cgroup *cgrp;
> +	struct cgroupfs_root *root;
> +
> +	mutex_lock(&cgroup_mutex);
> +	for_each_active_root(root) {
> +		cgrp = task_cgroup_from_root(ve->init_task, root);
> +		cgrp->ve_owner = NULL;
> +	}
> +	mutex_unlock(&cgroup_mutex);
> +}
> +
>   struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
>   {
>   	struct cgroup *ve_root = NULL;
> @@ -5455,6 +5469,7 @@ static void cgroup_release_agent(struct work_struct *work)
>   	raw_spin_lock(&release_list_lock);
>   	while (!list_empty(&release_list)) {
>   		char *argv[3], *envp[3];
> +		struct ve_struct *ve;
>   		int i, err;
>   		char *pathbuf = NULL, *agentbuf = NULL;
>   		struct cgroup *root_cgrp;
> @@ -5468,7 +5483,20 @@ static void cgroup_release_agent(struct work_struct *work)
>   			goto continue_free;
>   		if (__cgroup_path(cgrp, pathbuf, PAGE_SIZE, true) < 0)
>   			goto continue_free;
> +
> +		/*
> +		 * root_cgrp is the relative root for cgrp, for host
> +		 * cgroups root_cgrp is root->top_cgroup, for container
> +		 * cgroups it is any up the parent chain from cgrp marked
> +		 * as VE_ROOT.
> +		 */
>   		root_cgrp = cgroup_get_local_root(cgrp);
> +
> +		ve = NULL;
> +		if (root_cgrp->ve_owner)
> +			ve = root_cgrp->ve_owner;
> +		if (!ve)
> +			goto continue_free;
>   		if (root_cgrp->release_agent_path)
>   			agentbuf = kstrdup(root_cgrp->release_agent_path,
>   				GFP_KERNEL);
> @@ -5490,7 +5518,12 @@ static void cgroup_release_agent(struct work_struct *work)
>   		 * since the exec could involve hitting disk and hence
>   		 * be a slow process */
>   		mutex_unlock(&cgroup_mutex);
> +#ifdef CONFIG_VE
> +		err = call_usermodehelper_fns_ve(ve, argv[0], argv,
> +			envp, UMH_WAIT_EXEC, NULL, NULL, NULL);
> +#else
>   		err = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
> +#endif
>   		if (err < 0)
>   			pr_warn_ratelimited("cgroup release_agent "
>   					    "%s %s failed: %d\n",
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index a64b4a7..37353fb 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -480,6 +480,7 @@ static void ve_drop_context(struct ve_struct *ve)
>   static const struct timespec zero_time = { };
>   
>   extern int cgroup_mark_ve_root(struct ve_struct *ve);
> +extern void cgroup_unbind_roots_from_ve(struct ve_struct *ve);
>   
>   /* under ve->op_sem write-lock */
>   static int ve_start_container(struct ve_struct *ve)
> @@ -588,10 +589,12 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
>   	up_write(&ve->op_sem);
>   }
>   
>   void ve_exit_ns(struct pid_namespace *pid_ns)
>   {
>   	struct ve_struct *ve = current->task_ve;
>   
> +	cgroup_unbind_roots_from_ve(ve);
>   	/*
>   	 * current->cgroups already switched to init_css_set in cgroup_exit(),
>   	 * but current->task_ve still points to our exec ve.

This hunk looks broken and does not apply, should be something like:

@@ -595,6 +596,7 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
  {
         struct ve_struct *ve = current->task_ve;

+       cgroup_unbind_roots_from_ve(ve);
         /*
          * current->cgroups already switched to init_css_set in 
cgroup_exit(),
          * but current->task_ve still points to our exec ve.


> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.