[Devel] [PATCH vz8 v2] ve/pid: Export kernel.pid_max via ve cgroup

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Wed Jun 23 17:19:38 MSK 2021


On 23.06.2021 17:07, Konstantin Khorenko wrote:
> From: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> 
> This member represents kernel.pid_max sysctl it is vz-specific but
> lays on pid namespace. To be able to c/r from libvzctl script it is
> better put pid_max in ve cgroup, these way we do not need to enter
> container root pid namespace to get/set these sysctl.
> 
> Note: we need to be able to set pid_max on running Container,
> as we can't set pid_max before we have ve's pidns.
> 
> https://jira.sw.ru/browse/PSBM-48397
> 
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> Acked-by: Cyrill Gorcunov <gorcunov at openvz.org>
> 
> Cherry-picked from vz7 commit be980b3141ca ("ve/pid: Export
> kernel.pid_max via ve cgroup")
> 
> v2 changes:
> * vz8 note: read and write handlers do not need to get ve->op_sem,
>    ve->ve_ns is rcu protected, so rcu_read_(un)lock() is enough.
> 
>    See ve_drop_context():
>          rcu_assign_pointer(ve->ve_ns, NULL);
>          synchronize_rcu();
>          put_nsproxy(ve_ns);
> 
> * Also check for ve->is_running in redundant and has been removed.
>    Despite the ve->is_running value (even if it's 0 already and the CT is
>    being stopped), if we defeference ve->ve_ns under rcu and get !NULL,
>    we are safe to write pid_max value.
> 
> https://jira.sw.ru/browse/PSBM-102629

Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> ---
>   kernel/ve/ve.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 50 insertions(+)
> 
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 954aa8127d99..9667f9051c02 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -1337,6 +1337,50 @@ enum {
>   	VE_CF_CLOCK_BOOTBASED,
>   };
>   
> +static u64 ve_pid_max_read_u64(struct cgroup_subsys_state *css,
> +			       struct cftype *cft)
> +{
> +	struct ve_struct *ve = css_to_ve(css);
> +	struct nsproxy *ve_ns;
> +	u64 pid_max = 0;
> +
> +	rcu_read_lock();
> +	ve_ns = rcu_dereference(ve->ve_ns);
> +	if (ve_ns && ve_ns->pid_ns_for_children)
> +		pid_max = ve_ns->pid_ns_for_children->pid_max;
> +
> +	rcu_read_unlock();
> +
> +	return pid_max;
> +}
> +
> +extern int pid_max_min, pid_max_max;
> +
> +static int ve_pid_max_write_running_u64(struct cgroup_subsys_state *css,
> +					struct cftype *cft, u64 val)
> +{
> +	struct ve_struct *ve = css_to_ve(css);
> +	struct nsproxy *ve_ns;
> +
> +	if (!ve_is_super(get_exec_env()) &&
> +	    !ve->is_pseudosuper)
> +		return -EPERM;
> +
> +	rcu_read_lock();
> +	ve_ns = rcu_dereference(ve->ve_ns);
> +	if (!ve_ns || !ve_ns->pid_ns_for_children) {
> +		return -EBUSY;
> +	}
> +	if (pid_max_min > val || pid_max_max < val) {
> +		return -EINVAL;
> +	}
> +
> +	ve->ve_ns->pid_ns_for_children->pid_max = val;
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
>   static int ve_ts_read(struct seq_file *sf, void *v)
>   {
>   	struct ve_struct *ve = css_to_ve(seq_css(sf));
> @@ -1735,6 +1779,12 @@ static struct cftype ve_cftypes[] = {
>   		.write			= ve_ts_write,
>   		.private		= VE_CF_CLOCK_BOOTBASED,
>   	},
> +	{
> +		.name			= "pid_max",
> +		.flags			= CFTYPE_NOT_ON_ROOT,
> +		.read_u64		= ve_pid_max_read_u64,
> +		.write_u64		= ve_pid_max_write_running_u64,
> +	},
>   	{
>   		.name			= "netns_max_nr",
>   		.flags			= CFTYPE_NOT_ON_ROOT,
> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the Devel mailing list