[Devel] [PATCH rh7 v2] ve: rework veid handling

Kirill Tkhai ktkhai at odin.com
Thu Sep 24 03:53:16 PDT 2015


On 24.09.2015 13:39, Vladimir Davydov wrote:
> Currently, we reserve veid right on write to ve.veid, returning EEXIST
> if the id is already in use, and free it when the ve cgroup is removed
> (ve_offline). Before commit 33f3496e5d13 ("ms/cgroup: split cgroup
> destruction into two steps") ve_offline() was called synchronously on
> rmdir and everything worked fine, but the above mentioned commit changed
> this so that ve_offline() is now called from a workqueue. As a result,
> Setting the same ve.veid right after rmdir-mkdir sequence is likely to
> fail with EEXIST, because the work function removing the id might still
> be in progress. This leads to `vzctl start` failures if the container
> stopped by itself (e.g. called halt), because vzctl recreates the ve
> cgroup directory on start if it already exists.
> 
> To fix this issue, this patch reworks veid handling as follows. Now it's
> OK to write any allowed id to ve.veid as long as the container is
> stopped (if it's running EBUSY is returned), and no error will be
> returned even if there is a collision. Actual veid reservation happens
> only on container start (i.e. when START is written to ve.state). If
> ve_start_container() finds that the preferred id is already in use, it
> returns EEXIST. Correspondingly, veid is freed on container stop
> (ve_exit_ns), not on cgroup removal. This ensures that once the
> container has stopped, it's OK to reuse its id, no matter if its ve
> cgroup is being destroyed or not.
> 
> https://jira.sw.ru/browse/PSBM-39338
> 
> Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>

Acked-by: Kirill Tkhai <ktkhai at odin.com>

> ---
> Changes in v2:
>  - move veid alloc/remove to ve_list_add/del so that one can't two ves
>    with the same id on the list
>  - return EEXIST instead of ENOSPC on veid collision
> 
>  kernel/ve/ve.c | 67 ++++++++++++++++++++++------------------------------------
>  1 file changed, 25 insertions(+), 42 deletions(-)
> 
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index c67ff3f8d19e..aff3b03fcece 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -118,29 +118,31 @@ void put_ve(struct ve_struct *ve)
>  }
>  EXPORT_SYMBOL(put_ve);
>  
> -static void ve_list_add(struct ve_struct *ve)
> +static int ve_list_add(struct ve_struct *ve)
>  {
> +	int err;
> +
>  	mutex_lock(&ve_list_lock);
> -	if (idr_replace(&ve_idr, ve, ve->veid) != NULL)
> -		WARN_ON(1);
> +	err = idr_alloc(&ve_idr, ve, ve->veid, ve->veid + 1, GFP_KERNEL);
> +	if (err < 0) {
> +		if (err == -ENOSPC)
> +			err = -EEXIST;
> +		goto out;
> +	}
>  	list_add(&ve->ve_list, &ve_list_head);
>  	nr_ve++;
> +	err = 0;
> +out:
>  	mutex_unlock(&ve_list_lock);
> +	return err;
>  }
>  
> -static void ve_list_del(struct ve_struct *ve, bool free_id)
> +static void ve_list_del(struct ve_struct *ve)
>  {
>  	mutex_lock(&ve_list_lock);
> -	/* Check whether ve linked in list of ve's and unlink ve from list if so */
> -	if (!list_empty(&ve->ve_list)) {
> -		/* Hide ve from finding by veid */
> -		if (idr_replace(&ve_idr, NULL, ve->veid) != ve)
> -			WARN_ON(1);
> -		list_del_init(&ve->ve_list);
> -		nr_ve--;
> -	}
> -	if (free_id && ve->veid)
> -		idr_remove(&ve_idr, ve->veid);
> +	idr_remove(&ve_idr, ve->veid);
> +	list_del_init(&ve->ve_list);
> +	nr_ve--;
>  	mutex_unlock(&ve_list_lock);
>  }
>  
> @@ -436,7 +438,10 @@ int ve_start_container(struct ve_struct *ve)
>  	ve->start_jiffies = get_jiffies_64();
>  
>  	ve_grab_context(ve);
> -	ve_list_add(ve);
> +
> +	err = ve_list_add(ve);
> +	if (err)
> +		goto err_list;
>  
>  	err = ve_start_kthread(ve);
>  	if (err)
> @@ -475,7 +480,8 @@ err_legacy_pty:
>  err_umh:
>  	ve_stop_kthread(ve);
>  err_kthread:
> -	ve_list_del(ve, false);
> +	ve_list_del(ve);
> +err_list:
>  	ve_drop_context(ve);
>  	return err;
>  }
> @@ -538,7 +544,7 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
>  	down_write(&ve->op_sem);
>  	ve_hook_iterate_fini(VE_SS_CHAIN, ve);
>  
> -	ve_list_del(ve, false);
> +	ve_list_del(ve);
>  	ve_drop_context(ve);
>  	up_write(&ve->op_sem);
>  
> @@ -644,13 +650,6 @@ err_ve:
>  	return ERR_PTR(err);
>  }
>  
> -static void ve_offline(struct cgroup *cg)
> -{
> -	struct ve_struct *ve = cgroup_ve(cg);
> -
> -	ve_list_del(ve, true);
> -}
> -
>  static void ve_devmnt_free(struct ve_devmnt *devmnt)
>  {
>  	if (!devmnt)
> @@ -839,32 +838,17 @@ static u64 ve_id_read(struct cgroup *cg, struct cftype *cft)
>  static int ve_id_write(struct cgroup *cg, struct cftype *cft, u64 value)
>  {
>  	struct ve_struct *ve = cgroup_ve(cg);
> -	int veid;
>  	int err = 0;
>  
>  	if (value <= 0 || value > INT_MAX)
>  		return -EINVAL;
>  
>  	down_write(&ve->op_sem);
> -	if (ve->veid) {
> +	if (ve->is_running || ve->ve_ns) {
>  		if (ve->veid != value)
>  			err = -EBUSY;
> -		goto out;
> -	}
> -
> -	mutex_lock(&ve_list_lock);
> -	/* we forbid to start a container without veid (see ve_start_container)
> -	 * so the ve cannot be on the list */
> -	BUG_ON(!list_empty(&ve->ve_list));
> -	veid = idr_alloc(&ve_idr, NULL, value, value + 1, GFP_KERNEL);
> -	if (veid < 0) {
> -		err = veid;
> -		if (err == -ENOSPC)
> -			err = -EEXIST;
>  	} else
> -		ve->veid = veid;
> -	mutex_unlock(&ve_list_lock);
> -out:
> +		ve->veid = value;
>  	up_write(&ve->op_sem);
>  	return err;
>  }
> @@ -1207,7 +1191,6 @@ struct cgroup_subsys ve_subsys = {
>  	.name		= "ve",
>  	.subsys_id	= ve_subsys_id,
>  	.css_alloc	= ve_create,
> -	.css_offline	= ve_offline,
>  	.css_free	= ve_destroy,
>  	.can_attach	= ve_can_attach,
>  	.attach		= ve_attach,
> 



More information about the Devel mailing list