[Devel] [PATCH RH9 27/28] ve/pid: Export kernel.pid_max via ve cgroup
Andrey Zhadchenko
andrey.zhadchenko at virtuozzo.com
Thu Oct 14 13:33:35 MSK 2021
From: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
This member represents kernel.pid_max sysctl it is vz-specific but
lays on pid namespace. To be able to c/r from libvzctl script it is
better put pid_max in ve cgroup, these way we do not need to enter
container root pid namespace to get/set these sysctl.
Note: we need to be able to set pid_max on running Container,
as we can't set pid_max before we have ve's pidns.
https://jira.sw.ru/browse/PSBM-48397
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Acked-by: Cyrill Gorcunov <gorcunov at openvz.org>
Cherry-picked from vz7 commit be980b3141ca ("ve/pid: Export
kernel.pid_max via ve cgroup")
v2 changes:
* vz8 note: read and write handlers do not need to get ve->op_sem,
ve->ve_ns is rcu protected, so rcu_read_(un)lock() is enough.
See ve_drop_context():
rcu_assign_pointer(ve->ve_ns, NULL);
synchronize_rcu();
put_nsproxy(ve_ns);
* Also check for ve->is_running in redundant and has been removed.
Despite the ve->is_running value (even if it's 0 already and the CT is
being stopped), if we defeference ve->ve_ns under rcu and get !NULL,
we are safe to write pid_max value.
https://jira.sw.ru/browse/PSBM-102629
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
(cherry picked from vz8 commit cb704dc9daf17cf5f30e0c8ce37f8e3fcb3fcb69)
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
kernel/ve/ve.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 2f4b746..e410718 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -1144,6 +1144,50 @@ enum {
VE_CF_CLOCK_BOOTBASED,
};
+static u64 ve_pid_max_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ struct ve_struct *ve = css_to_ve(css);
+ struct nsproxy *ve_ns;
+ u64 pid_max = 0;
+
+ rcu_read_lock();
+ ve_ns = rcu_dereference(ve->ve_ns);
+ if (ve_ns && ve_ns->pid_ns_for_children)
+ pid_max = ve_ns->pid_ns_for_children->pid_max;
+
+ rcu_read_unlock();
+
+ return pid_max;
+}
+
+extern int pid_max_min, pid_max_max;
+
+static int ve_pid_max_write_running_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 val)
+{
+ struct ve_struct *ve = css_to_ve(css);
+ struct nsproxy *ve_ns;
+
+ if (!ve_is_super(get_exec_env()) &&
+ !ve->is_pseudosuper)
+ return -EPERM;
+
+ rcu_read_lock();
+ ve_ns = rcu_dereference(ve->ve_ns);
+ if (!ve_ns || !ve_ns->pid_ns_for_children) {
+ return -EBUSY;
+ }
+ if (pid_max_min > val || pid_max_max < val) {
+ return -EINVAL;
+ }
+
+ ve->ve_ns->pid_ns_for_children->pid_max = val;
+ rcu_read_unlock();
+
+ return 0;
+}
+
static int ve_ts_read(struct seq_file *sf, void *v)
{
struct ve_struct *ve = css_to_ve(seq_css(sf));
@@ -1481,6 +1525,12 @@ static ssize_t ve_write_ctty(struct kernfs_open_file *of, char *buf,
.private = VE_CF_CLOCK_BOOTBASED,
},
{
+ .name = "pid_max",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_u64 = ve_pid_max_read_u64,
+ .write_u64 = ve_pid_max_write_running_u64,
+ },
+ {
.name = "netns_max_nr",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = ve_netns_max_nr_read,
--
1.8.3.1
More information about the Devel
mailing list