[Devel] [PATCH RHEL7 v14 11/12] ve/cgroup: set release_agent_path for root cgroups separately for each ve.
Valeriy Vdovin
valeriy.vdovin at virtuozzo.com
Tue Apr 21 15:53:48 MSK 2020
This is done so that each container could set it's own release agent.
Release agent information is now stored in per-cgroup-root data
structure in ve.
https://jira.sw.ru/browse/PSBM-83887
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
---
include/linux/cgroup.h | 5 +-
include/linux/ve.h | 7 +++
kernel/cgroup.c | 133 ++++++++++++++++++++++++++++++++++++++++---------
kernel/ve/ve.c | 69 ++++++++++++++++++++++++-
4 files changed, 186 insertions(+), 28 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d0ce3cc..911dd48 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -394,9 +394,6 @@ struct cgroupfs_root {
/* IDs for cgroups in this hierarchy */
struct ida cgroup_ida;
- /* The path to use for release notifications. */
- char release_agent_path[PATH_MAX];
-
/* The name for this hierarchy - may be empty */
char name[MAX_CGROUP_ROOT_NAMELEN];
};
@@ -639,7 +636,7 @@ int cgroup_task_count(const struct cgroup *cgrp);
void cgroup_release_agent(struct work_struct *work);
#ifdef CONFIG_VE
-void cgroup_mark_ve_roots(struct ve_struct *ve);
+int cgroup_mark_ve_roots(struct ve_struct *ve);
void cgroup_unmark_ve_roots(struct ve_struct *ve);
struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp);
#endif
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 94a07df..da719b4 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -214,6 +214,13 @@ void do_update_load_avg_ve(void);
void ve_add_to_release_list(struct cgroup *cgrp);
void ve_rm_from_release_list(struct cgroup *cgrp);
+
+int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot,
+ const char *release_agent);
+
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+ struct cgroup *cgrp_root);
+
extern struct ve_struct *get_ve(struct ve_struct *ve);
extern void put_ve(struct ve_struct *ve);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ae417ce..006319f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1090,10 +1090,21 @@ static int rebind_subsystems(struct cgroupfs_root *root,
static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
{
+ const char *release_agent;
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
struct cgroup_subsys *ss;
+ struct cgroup *root_cgrp = &root->top_cgroup;
+#ifdef CONFIG_VE
+ struct ve_struct *ve = get_exec_env();
+ if (!ve_is_super(ve)) {
+ mutex_lock(&cgroup_mutex);
+ root_cgrp = task_cgroup_from_root(ve->init_task, root);
+ mutex_unlock(&cgroup_mutex);
+ }
+#endif
mutex_lock(&cgroup_root_mutex);
+
for_each_subsys(root, ss)
seq_printf(seq, ",%s", ss->name);
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
@@ -1104,9 +1115,12 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",xattr");
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
seq_puts(seq, ",cpuset_v2_mode");
- if (strlen(root->release_agent_path))
- seq_show_option(seq, "release_agent",
- root->release_agent_path);
+ rcu_read_lock();
+ release_agent = ve_get_release_agent_path(root_cgrp->ve_owner,
+ root_cgrp);
+ if (release_agent && release_agent[0])
+ seq_show_option(seq, "release_agent", release_agent);
+ rcu_read_unlock();
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
seq_puts(seq, ",clone_children");
if (strlen(root->name))
@@ -1384,8 +1398,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
/* re-populate subsystem files */
cgroup_populate_dir(cgrp, false, added_mask);
- if (opts.release_agent)
- strcpy(root->release_agent_path, opts.release_agent);
+ if (opts.release_agent) {
+ struct cgroup *root_cgrp;
+ root_cgrp = cgroup_get_local_root(cgrp);
+ if (root_cgrp->ve_owner)
+ ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+ root_cgrp, opts.release_agent);
+ }
out_unlock:
kfree(opts.release_agent);
kfree(opts.name);
@@ -1546,8 +1565,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
root->subsys_mask = opts->subsys_mask;
root->flags = opts->flags;
ida_init(&root->cgroup_ida);
- if (opts->release_agent)
- strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
strcpy(root->name, opts->name);
if (opts->cpuset_clone_children)
@@ -2313,27 +2330,44 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
const char *buffer)
{
- BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-
+ int ret = 0;
+ struct cgroup *root_cgrp;
if (strlen(buffer) >= PATH_MAX)
return -EINVAL;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- mutex_lock(&cgroup_root_mutex);
- strcpy(cgrp->root->release_agent_path, buffer);
- mutex_unlock(&cgroup_root_mutex);
+ root_cgrp = cgroup_get_local_root(cgrp);
+ BUG_ON(!root_cgrp);
+ if (root_cgrp->ve_owner) {
+ ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+ root_cgrp, buffer);
+ }
+
mutex_unlock(&cgroup_mutex);
- return 0;
+ return ret;
}
static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
struct seq_file *seq)
{
+ const char *release_agent;
+ struct cgroup *root_cgrp;
+
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- seq_puts(seq, cgrp->root->release_agent_path);
+
+ root_cgrp = cgroup_get_local_root(cgrp);
+ if (root_cgrp->ve_owner) {
+ rcu_read_lock();
+ release_agent = ve_get_release_agent_path(
+ root_cgrp->ve_owner, root_cgrp);
+
+ if (release_agent)
+ seq_puts(seq, release_agent);
+ rcu_read_unlock();
+ }
seq_putc(seq, '\n');
mutex_unlock(&cgroup_mutex);
return 0;
@@ -4149,7 +4183,7 @@ static struct cftype files[] = {
},
{
.name = "release_agent",
- .flags = CFTYPE_ONLY_ON_ROOT,
+ .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE,
.read_seq_string = cgroup_release_agent_show,
.write_string = cgroup_release_agent_write,
.max_write_len = PATH_MAX,
@@ -4278,22 +4312,60 @@ static int subgroups_count(struct cgroup *cgroup)
return cgrps_count;
}
+static struct cftype *get_cftype_by_name(const char *name)
+{
+ struct cftype *cft;
+ for (cft = files; cft->name[0] != '\0'; cft++) {
+ if (!strcmp(cft->name, name))
+ return cft;
+ }
+ return NULL;
+}
+
#ifdef CONFIG_VE
-void cgroup_mark_ve_roots(struct ve_struct *ve)
+int cgroup_mark_ve_roots(struct ve_struct *ve)
{
- struct cgroup *cgrp;
+ struct cgroup *cgrp, *tmp;
struct cgroupfs_root *root;
+ int err = 0;
+ struct cftype *cft;
+ LIST_HEAD(pending);
+
+ cft = get_cftype_by_name("release_agent");
+ BUG_ON(!cft);
+ mutex_lock(&cgroup_cft_mutex);
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
cgrp = task_cgroup_from_root(ve->init_task, root);
- cgrp->ve_owner = ve;
+ rcu_assign_pointer(cgrp->ve_owner, ve);
set_bit(CGRP_VE_ROOT, &cgrp->flags);
-
+ dget(cgrp->dentry);
+ list_add_tail(&cgrp->cft_q_node, &pending);
if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask))
link_ve_root_cpu_cgroup(cgrp);
}
mutex_unlock(&cgroup_mutex);
+ list_for_each_entry_safe(cgrp, tmp, &pending, cft_q_node) {
+ struct inode *inode = cgrp->dentry->d_inode;
+
+ if (err) {
+ dput(cgrp->dentry);
+ continue;
+ }
+
+ mutex_lock(&inode->i_mutex);
+ mutex_lock(&cgroup_mutex);
+ if (!cgroup_is_removed(cgrp))
+ err = cgroup_add_file(cgrp, NULL, cft);
+ mutex_unlock(&cgroup_mutex);
+ mutex_unlock(&inode->i_mutex);
+
+ list_del_init(&cgrp->cft_q_node);
+ dput(cgrp->dentry);
+ }
+ mutex_unlock(&cgroup_cft_mutex);
+ return err;
}
void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -5429,15 +5501,24 @@ static void check_for_release(struct cgroup *cgrp)
void cgroup_release_agent(struct work_struct *work)
{
struct ve_struct *ve;
+ char *agentbuf;
+
+ agentbuf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!agentbuf) {
+ pr_warn("failed to allocate agentbuf\n");
+ return;
+ }
+
ve = container_of(work, struct ve_struct, release_agent_work);
mutex_lock(&cgroup_mutex);
raw_spin_lock(&ve->release_list_lock);
while (!list_empty(&ve->release_list)) {
char *argv[3], *envp[3];
int i, err;
- char *pathbuf = NULL, *agentbuf = NULL;
+ char *pathbuf = NULL;
struct cgroup *cgrp, *root_cgrp;
struct task_struct *ve_task;
+ const char *release_agent;
cgrp = list_entry(ve->release_list.next,
struct cgroup,
@@ -5465,9 +5546,15 @@ void cgroup_release_agent(struct work_struct *work)
rcu_read_unlock();
goto continue_free;
}
+
+ release_agent = ve_get_release_agent_path(ve, root_cgrp);
+
+ *agentbuf = 0;
+ if (release_agent)
+ strcpy(agentbuf, release_agent);
rcu_read_unlock();
- agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
- if (!agentbuf)
+
+ if (!*agentbuf)
goto continue_free;
i = 0;
@@ -5498,11 +5585,11 @@ void cgroup_release_agent(struct work_struct *work)
mutex_lock(&cgroup_mutex);
continue_free:
kfree(pathbuf);
- kfree(agentbuf);
raw_spin_lock(&ve->release_list_lock);
}
raw_spin_unlock(&ve->release_list_lock);
mutex_unlock(&cgroup_mutex);
+ kfree(agentbuf);
}
static int __init cgroup_disable(char *str)
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index a30343f..5371c10 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -51,6 +51,11 @@ struct per_cgroot_data {
* data is related to this cgroup
*/
struct cgroup *cgroot;
+ /*
+ * path to release agent binaray, that should
+ * be spawned for all cgroups under this cgroup root
+ */
+ struct cgroup_rcu_string __rcu *release_agent_path;
};
extern struct kmapset_set sysfs_ve_perms_set;
@@ -175,6 +180,59 @@ static inline struct per_cgroot_data *per_cgroot_get_or_create(
return data;
}
+int ve_set_release_agent_path(struct ve_struct *ve,
+ struct cgroup *cgroot, const char *release_agent)
+{
+ struct per_cgroot_data *data;
+ struct cgroup_rcu_string *new_path, *old_path;
+ int err = 0;
+
+ new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent));
+ if (IS_ERR(new_path))
+ return PTR_ERR(new_path);
+
+ data = per_cgroot_get_or_create(ve, cgroot);
+ if (IS_ERR(data)) {
+ kfree(new_path);
+ return PTR_ERR(data);
+ }
+
+ raw_spin_lock(&ve->per_cgroot_list_lock);
+
+ old_path = rcu_dereference_protected(data->release_agent_path,
+ lockdep_is_held(&ve->per_cgroot_list_lock));
+
+ rcu_assign_pointer(data->release_agent_path, new_path);
+ raw_spin_unlock(&ve->per_cgroot_list_lock);
+
+ if (old_path)
+ kfree_rcu(old_path, rcu_head);
+
+ return err;
+}
+
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+ struct cgroup *cgroot)
+{
+ /* caller must grab rcu_read_lock */
+ const char *result = NULL;
+ struct per_cgroot_data *data;
+ struct cgroup_rcu_string *str;
+
+ raw_spin_lock(&ve->per_cgroot_list_lock);
+
+ data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
+ raw_spin_unlock(&ve->per_cgroot_list_lock);
+
+ if (!data)
+ return NULL;
+
+ str = rcu_dereference(data->release_agent_path);
+ if (str)
+ result = str->val;
+ return result;
+}
+
struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id)
{
struct cgroup_subsys_state *css, *tmp;
@@ -646,7 +704,9 @@ static int ve_start_container(struct ve_struct *ve)
if (err < 0)
goto err_iterate;
- cgroup_mark_ve_roots(ve);
+ err = cgroup_mark_ve_roots(ve);
+ if (err)
+ goto err_mark_ve;
ve->is_running = 1;
@@ -656,6 +716,8 @@ static int ve_start_container(struct ve_struct *ve)
return 0;
+err_mark_ve:
+ ve_hook_iterate_fini(VE_SS_CHAIN, ve);
err_iterate:
ve_workqueue_stop(ve);
err_workqueue:
@@ -672,7 +734,12 @@ err_list:
static void per_cgroot_free_all_locked(struct list_head *per_cgroot_list)
{
struct per_cgroot_data *data, *saved;
+ struct cgroup_rcu_string *release_agent;
list_for_each_entry_safe(data, saved, per_cgroot_list, list) {
+ release_agent = data->release_agent_path;
+ RCU_INIT_POINTER(data->release_agent_path, NULL);
+ if (release_agent)
+ kfree_rcu(release_agent, rcu_head);
list_del_init(&data->list);
kfree(data);
}
--
1.8.3.1
More information about the Devel
mailing list