[Devel] [PATCH 2/2 v1] ve/cgroup: Added pointers to owning ve to root cgroups
Valeriy Vdovin
valeriy.vdovin at virtuozzo.com
Tue Mar 17 13:28:15 MSK 2020
Follow-up patch to per-cgroup release_agent property. release_agent
notifications are spawned from a special kthread, running under ve0. But
newly spawned tasks should run under their own ve context. Easy way to
pass this information to a spawning thread is by adding 've_owner' field
to a root cgroup. At notification any cgroup can be walked upwards to
it's root and get ve_owner from there.
https://jira.sw.ru/browse/PSBM-83887
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
---
include/linux/cgroup.h | 3 +++
include/linux/ve.h | 8 ++++++++
kernel/cgroup.c | 33 +++++++++++++++++++++++++++++++++
kernel/ve/ve.c | 3 +++
4 files changed, 47 insertions(+)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cad5b4f..513658b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -286,6 +286,9 @@ struct cgroup {
struct simple_xattrs xattrs;
u64 subgroups_limit;
+ /* ve_owner, responsible for running release agent. */
+ struct ve_struct *ve_owner;
+
/*
* Per-cgroup path to release agent binary for release
* notifications.
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 9d60838..9cc5257 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -268,6 +268,14 @@ static inline struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
struct seq_file;
struct kernel_cpustat;
+/*
+ * cgroup needs to know it's owning ve for some of operations, but
+ * cgroup's lifetime is independant of ve's, in theory ve can be destroyed
+ * earlier than some of it's cgroups.
+ */
+void ve_add_referring_cgroup(struct ve_struct *ve, struct cgroup *cgrp);
+void ve_remove_referring_cgroups(struct ve_struct *ve);
+
#if defined(CONFIG_VE) && defined(CONFIG_CGROUP_SCHED)
int ve_show_cpu_stat(struct ve_struct *ve, struct seq_file *p);
int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0b64d88..105536b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4318,6 +4318,7 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
cgrp = task_cgroup_from_root(ve->init_task, root);
+ cgrp->ve_owner = ve;
set_bit(CGRP_VE_ROOT, &cgrp->flags);
err = cgroup_add_file_on_mark_ve(cgrp);
if (err)
@@ -4329,6 +4330,19 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
return err;
}
+void cgroup_unbind_roots_from_ve(struct ve_struct *ve)
+{
+ struct cgroup *cgrp;
+ struct cgroupfs_root *root;
+
+ mutex_lock(&cgroup_mutex);
+ for_each_active_root(root) {
+ cgrp = task_cgroup_from_root(ve->init_task, root);
+ cgrp->ve_owner = NULL;
+ }
+ mutex_unlock(&cgroup_mutex);
+}
+
struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
{
struct cgroup *ve_root = NULL;
@@ -5455,6 +5469,7 @@ static void cgroup_release_agent(struct work_struct *work)
raw_spin_lock(&release_list_lock);
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
+ struct ve_struct *ve;
int i, err;
char *pathbuf = NULL, *agentbuf = NULL;
struct cgroup *root_cgrp;
@@ -5468,7 +5483,20 @@ static void cgroup_release_agent(struct work_struct *work)
goto continue_free;
if (__cgroup_path(cgrp, pathbuf, PAGE_SIZE, true) < 0)
goto continue_free;
+
+ /*
+ * root_cgrp is the relative root for cgrp, for host
+ * cgroups root_cgrp is root->top_cgroup, for container
+ * cgroups it is any up the parent chain from cgrp marked
+ * as VE_ROOT.
+ */
root_cgrp = cgroup_get_local_root(cgrp);
+
+ ve = NULL;
+ if (root_cgrp->ve_owner)
+ ve = root_cgrp->ve_owner;
+ if (!ve)
+ goto continue_free;
if (root_cgrp->release_agent_path)
agentbuf = kstrdup(root_cgrp->release_agent_path,
GFP_KERNEL);
@@ -5490,7 +5518,12 @@ static void cgroup_release_agent(struct work_struct *work)
* since the exec could involve hitting disk and hence
* be a slow process */
mutex_unlock(&cgroup_mutex);
+#ifdef CONFIG_VE
+ err = call_usermodehelper_fns_ve(ve, argv[0], argv,
+ envp, UMH_WAIT_EXEC, NULL, NULL, NULL);
+#else
err = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+#endif
if (err < 0)
pr_warn_ratelimited("cgroup release_agent "
"%s %s failed: %d\n",
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index a64b4a7..37353fb 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -480,6 +480,7 @@ static void ve_drop_context(struct ve_struct *ve)
static const struct timespec zero_time = { };
extern int cgroup_mark_ve_root(struct ve_struct *ve);
+extern void cgroup_unbind_roots_from_ve(struct ve_struct *ve);
/* under ve->op_sem write-lock */
static int ve_start_container(struct ve_struct *ve)
@@ -588,10 +589,12 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
up_write(&ve->op_sem);
}
void ve_exit_ns(struct pid_namespace *pid_ns)
{
struct ve_struct *ve = current->task_ve;
+ cgroup_unbind_roots_from_ve(ve);
/*
* current->cgroups already switched to init_css_set in cgroup_exit(),
* but current->task_ve still points to our exec ve.
--
1.8.3.1
More information about the Devel
mailing list