[Devel] [PATCH RHEL9 COMMIT] ve/cgroup: Set release_agent_path for root cgroups separately
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Oct 20 11:43:57 MSK 2021
The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.10.12
------>
commit ee84e05864f9acf498132dbf166e0462d0d5665e
Author: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Date: Wed Oct 20 11:43:57 2021 +0300
ve/cgroup: Set release_agent_path for root cgroups separately
Previous patches already switched release agent binary calling mechanism
to call the binary in ve. Now we need to switch the binary path to
per-ve path. So we store release agent path in ve_ra_data structures in
per-ve ve->ra_data_list.
Short history of the patch:
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
https://jira.sw.ru/browse/PSBM-83887
(cherry-picked from vz7 commit f1199bd9589b7c0914343dcc72f49ddaa9b98496)
https://jira.sw.ru/browse/PSBM-108270
(cherry picked from vz8 commit 40381d9afaff5618fbe93428f125a7acc2bde56d)
+++
ve/cgroup: Add release_agent to each container root cgroup
(cherry picked from vz8 commit 00bd9c411dba17692408fcdd1769fa90b275313b)
vz9 changes:
- rework and cleanup patch description
- don't remove cgroup_kn_lock_live error checking
- switch to new ve_ra_data_set / ve_ra_data_get_path_locked
- add BUG_ON if cft->file_offset is set as that can lead to
cgroup_add_file(NULL,...) crash
- lock cgroup_mutex as we need it for cgroup_add_file / cgroup_rm_file
and cgroup_is_dead consistency
- fixup hunks from 00bd9c411dba are merged here and there but without excess
cft_q_node related stuff
- if for some reason ve sees host root cgroup don't allow writing to it
and showing it
Note: ve_ra_data_set does allocation so we do not want it under
rcu_read_lock, so we get_ve and release rcu_read_lock. We can get_ve
(not tryget) as we just saw ve_owner so ve is not fully stopped yet and
should be populated.
https://jira.sw.ru/browse/PSBM-134002
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Reviewed-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
include/linux/cgroup-defs.h | 3 --
kernel/cgroup/cgroup-internal.h | 1 +
kernel/cgroup/cgroup-v1.c | 105 +++++++++++++++++++++++++++-------------
kernel/cgroup/cgroup.c | 40 ++++++++++++++-
4 files changed, 111 insertions(+), 38 deletions(-)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 2035909f7a0a..ef8ebd8a1363 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -539,9 +539,6 @@ struct cgroup_root {
/* Hierarchy-specific flags */
unsigned int flags;
- /* The path to use for release notifications. */
- char release_agent_path[PATH_MAX];
-
/* The name for this hierarchy - may be empty */
char name[MAX_CGROUP_ROOT_NAMELEN];
};
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 81bf97510198..c23ea4aa8e19 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -267,6 +267,7 @@ extern const struct proc_ns_operations cgroupns_operations;
extern struct cftype cgroup1_base_files[];
extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
extern const struct fs_parameter_spec cgroup1_fs_parameters[];
+struct cftype *get_cftype_by_name(const char *name);
int proc_cgroupstats_show(struct seq_file *m, void *v);
bool cgroup1_ssid_disabled(int ssid);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 5b3e8e79cc32..fe781bda5962 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -39,9 +39,6 @@ static bool cgroup_no_v1_named;
*/
static struct workqueue_struct *cgroup_pidlist_destroy_wq;
-/* protects cgroup_subsys->release_agent_path */
-static DEFINE_SPINLOCK(release_agent_path_lock);
-
bool cgroup1_ssid_disabled(int ssid)
{
return cgroup_no_v1_mask & (1 << ssid);
@@ -542,28 +539,62 @@ static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
+ struct cgroup *root_cgrp;
+ struct ve_struct *ve = NULL;
struct cgroup *cgrp;
-
- BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ int ret;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENODEV;
- spin_lock(&release_agent_path_lock);
- strlcpy(cgrp->root->release_agent_path, strstrip(buf),
- sizeof(cgrp->root->release_agent_path));
- spin_unlock(&release_agent_path_lock);
+
+ rcu_read_lock();
+ root_cgrp = cgroup_ve_root1(cgrp);
+ if (!root_cgrp) {
+ if (ve_is_super(get_exec_env()) && cgrp == &cgrp->root->cgrp)
+ ve = &ve0;
+ } else {
+ if (root_cgrp == cgrp)
+ ve = rcu_dereference(root_cgrp->ve_owner);
+ }
+ get_ve(ve);
+ rcu_read_unlock();
+
+ if (ve) {
+ ret = ve_ra_data_set(ve, cgrp->root, strstrip(buf));
+ } else {
+ /* The ve is stopped or we have a bad cgrp */
+ ret = -ENODEV;
+ }
+
+ put_ve(ve);
cgroup_kn_unlock(of->kn);
- return nbytes;
+ return ret ? : nbytes;
}
static int cgroup_release_agent_show(struct seq_file *seq, void *v)
{
+ const char *release_agent = NULL;
+ struct cgroup *root_cgrp;
struct cgroup *cgrp = seq_css(seq)->cgroup;
+ struct ve_struct *ve = NULL;
+
+ rcu_read_lock();
+ root_cgrp = cgroup_ve_root1(cgrp);
+ if (!root_cgrp) {
+ if (ve_is_super(get_exec_env()) && cgrp == &cgrp->root->cgrp)
+ ve = &ve0;
+ } else {
+ if (root_cgrp == cgrp)
+ ve = rcu_dereference(root_cgrp->ve_owner);
+ }
+
+ if (ve)
+ release_agent = ve_ra_data_get_path_locked(ve, cgrp->root);
+ if (release_agent)
+ seq_puts(seq, release_agent);
+ rcu_read_unlock();
- spin_lock(&release_agent_path_lock);
- seq_puts(seq, cgrp->root->release_agent_path);
- spin_unlock(&release_agent_path_lock);
seq_putc(seq, '\n');
return 0;
}
@@ -661,7 +692,7 @@ struct cftype cgroup1_base_files[] = {
},
{
.name = "release_agent",
- .flags = CFTYPE_ONLY_ON_ROOT,
+ .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE,
.seq_show = cgroup_release_agent_show,
.write = cgroup_release_agent_write,
.max_write_len = PATH_MAX - 1,
@@ -674,6 +705,17 @@ struct cftype cgroup1_base_files[] = {
{ } /* terminate */
};
+struct cftype *get_cftype_by_name(const char *name)
+{
+ struct cftype *cft;
+
+ for (cft = cgroup1_base_files; cft->name[0] != '\0'; cft++) {
+ if (!strcmp(cft->name, name))
+ return cft;
+ }
+ return NULL;
+}
+
#define _cg_virtualized(x) ((ve_is_super(get_exec_env())) ? (x) : 1)
/* Display information about each subsystem and each hierarchy */
@@ -827,6 +869,7 @@ void cgroup1_release_agent(struct work_struct *work)
spin_lock_irqsave(&ve->release_list_lock, flags);
while (!list_empty(&ve->release_list)) {
struct cgroup *cgrp;
+ const char *release_agent;
cgrp = list_entry(ve->release_list.next,
struct cgroup,
@@ -834,15 +877,14 @@ void cgroup1_release_agent(struct work_struct *work)
list_del_init(&cgrp->release_list);
spin_unlock_irqrestore(&ve->release_list_lock, flags);
- /* snoop agent path and exit early if empty */
- if (!cgrp->root->release_agent_path[0])
- goto continue_locked;
-
- spin_lock(&release_agent_path_lock);
- strlcpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX);
- spin_unlock(&release_agent_path_lock);
- if (!agentbuf[0])
+ rcu_read_lock();
+ release_agent = ve_ra_data_get_path_locked(ve, cgrp->root);
+ if (!release_agent || !release_agent[0]) {
+ rcu_read_unlock();
goto continue_locked;
+ }
+ strlcpy(agentbuf, release_agent, PATH_MAX);
+ rcu_read_unlock();
ret = cgroup_path_ns(cgrp, pathbuf, PATH_MAX, ve_cgroup_ns);
if (ret < 0 || ret >= PATH_MAX)
@@ -914,6 +956,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
{
+ const char *release_agent;
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_subsys *ss;
int ssid;
@@ -928,12 +971,11 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
seq_puts(seq, ",cpuset_v2_mode");
- spin_lock(&release_agent_path_lock);
- if (strlen(root->release_agent_path))
- seq_show_option(seq, "release_agent",
- root->release_agent_path);
- spin_unlock(&release_agent_path_lock);
-
+ rcu_read_lock();
+ release_agent = ve_ra_data_get_path_locked(get_exec_env(), root);
+ if (release_agent && release_agent[0])
+ seq_show_option(seq, "release_agent", release_agent);
+ rcu_read_unlock();
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
seq_puts(seq, ",clone_children");
if (strlen(root->name))
@@ -1144,11 +1186,8 @@ int cgroup1_reconfigure(struct fs_context *fc)
WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
- if (ctx->release_agent) {
- spin_lock(&release_agent_path_lock);
- strcpy(root->release_agent_path, ctx->release_agent);
- spin_unlock(&release_agent_path_lock);
- }
+ if (ctx->release_agent)
+ ret = ve_ra_data_set(get_exec_env(), root, ctx->release_agent);
trace_cgroup_remount(root);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1fcb3041abb7..396c0dc98b64 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -68,6 +68,8 @@
/* let's not notify more than 100 times per second */
#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
+#define CGROUP_FILENAME_RELEASE_AGENT "release_agent"
+
/*
* cgroup_mutex is the master lock. Any modification to cgroup or its
* hierarchy must be performed while holding it.
@@ -2050,12 +2052,21 @@ static inline bool ve_check_root_cgroups(struct css_set *cset)
return false;
}
+static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
+ struct cftype *cft, bool activate);
+
int cgroup_mark_ve_roots(struct ve_struct *ve)
{
struct cgrp_cset_link *link;
struct css_set *cset;
struct cgroup *cgrp;
+ struct cftype *cft;
+ int err = 0;
+
+ cft = get_cftype_by_name(CGROUP_FILENAME_RELEASE_AGENT);
+ BUG_ON(!cft || cft->file_offset);
+ mutex_lock(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
/*
@@ -2068,6 +2079,7 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
if (ve_check_root_cgroups(cset)) {
spin_unlock_irq(&css_set_lock);
+ mutex_unlock(&cgroup_mutex);
return -EINVAL;
}
@@ -2079,12 +2091,30 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
rcu_assign_pointer(cgrp->ve_owner, ve);
set_bit(CGRP_VE_ROOT, &cgrp->flags);
+
+ /*
+ * The cgroupns can hold reference on cgroups which are already
+ * offline with already removed directory on the filesystem. We
+ * should not add files to them.
+ */
+ if (!cgroup_is_dead(cgrp)) {
+ err = cgroup_add_file(NULL, cgrp, cft, true);
+ if (err) {
+ pr_warn("failed to add file to VE_ROOT cgroup,"
+ " err:%d\n", err);
+ break;
+ }
+ }
}
link_ve_root_cpu_cgroup(cset->subsys[cpu_cgrp_id]);
spin_unlock_irq(&css_set_lock);
+ mutex_unlock(&cgroup_mutex);
synchronize_rcu();
- return 0;
+
+ if (err)
+ cgroup_unmark_ve_roots(ve);
+ return err;
}
void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -2092,7 +2122,11 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
struct cgrp_cset_link *link;
struct css_set *cset;
struct cgroup *cgrp;
+ struct cftype *cft;
+
+ cft = get_cftype_by_name(CGROUP_FILENAME_RELEASE_AGENT);
+ mutex_lock(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
/*
@@ -2109,11 +2143,13 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
if (!is_virtualized_cgroup(cgrp))
continue;
+ cgroup_rm_file(cgrp, cft);
rcu_assign_pointer(cgrp->ve_owner, NULL);
clear_bit(CGRP_VE_ROOT, &cgrp->flags);
}
spin_unlock_irq(&css_set_lock);
+ mutex_unlock(&cgroup_mutex);
/* ve_owner == NULL will be visible */
synchronize_rcu();
}
@@ -2196,7 +2232,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
root->flags = ctx->flags;
if (ctx->release_agent)
- strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
+ ve_ra_data_set(get_exec_env(), root, ctx->release_agent);
if (ctx->name)
strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
if (ctx->cpuset_clone_children)
More information about the Devel
mailing list