[Devel] [PATCH RHEL7 COMMIT] ve: introduce ve.veid for setting id from userspace

Konstantin Khorenko khorenko at virtuozzo.com
Mon Jun 22 09:12:59 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.16
------>
commit 1cb2c254d2eb100fc4b6b82063b349a78420ae2b
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Mon Jun 22 20:12:59 2015 +0400

    ve: introduce ve.veid for setting id from userspace
    
    Currently, we neither want nor have enough time to substitute some
    legacy ioctls (such as those for obtaining net stats) with cgroup-based
    interface. To make these ioctls work with UUID-named containers we
    assign a numeric id to each container by writing to ve.veid. It is
    completely up to the userspace how these ids are generated, but they
    must meet the following requirements:
    
    1) Id must be in range (0, INT_MAX]
    
    2) All ids must be unique within the same node. This is enforced by the
    kernel, which returns EEXIST if the id is already taken.
    
    3) Id must not change during container restart, i.e. userspace should
    map containers to ids in a persistent manner somehow and always set the
    same id to the same container. This is needed, because venet stats are
    detached from ves and can outlive them. This is not enforced by the
    kernel.
    
    4) The id must be set only once and before container start. If it is not
    set (equals 0), start will fail with ENOENT. If the id has already been
    set, write to ve.veid will fail with EBUSY unless the new id coincides
    with the current id.
    
    This patch introduces this ve.veid file. It also removes obsoleted
    ve.legacy_veid.
    
    https://jira.sw.ru/browse/PSBM-34422
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
 kernel/ve/ve.c | 97 ++++++++++++++++++++++++++++------------------------------
 1 file changed, 47 insertions(+), 50 deletions(-)

diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 1710dcd..ff5a293 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -97,7 +97,6 @@ int vz_compat;
 EXPORT_SYMBOL(vz_compat);
 
 static DEFINE_IDR(ve_idr);
-#define VE_ID_START	(INT_MAX/2)
 
 static int __init vz_compat_setup(char *arg)
 {
@@ -131,7 +130,7 @@ static void ve_list_add(struct ve_struct *ve)
 	mutex_unlock(&ve_list_lock);
 }
 
-static void ve_list_del(struct ve_struct *ve)
+static void ve_list_del(struct ve_struct *ve, bool free_id)
 {
 	mutex_lock(&ve_list_lock);
 	/* Check whether ve linked in list of ve's and unlink ve from list if so */
@@ -142,25 +141,8 @@ static void ve_list_del(struct ve_struct *ve)
 		list_del_init(&ve->ve_list);
 		nr_ve--;
 	}
-	mutex_unlock(&ve_list_lock);
-}
-
-static long veid_alloc(long req_veid)
-{
-	long veid;
-	mutex_lock(&ve_list_lock);
-	veid = idr_alloc(&ve_idr, NULL,
-		       req_veid ? req_veid : VE_ID_START,
-		       req_veid ? req_veid + 1 : 0,
-		       GFP_KERNEL);
-	mutex_unlock(&ve_list_lock);
-	return veid;
-}
-
-static void veid_free(long veid)
-{
-	mutex_lock(&ve_list_lock);
-	idr_remove(&ve_idr, veid);
+	if (free_id && ve->veid)
+		idr_remove(&ve_idr, ve->veid);
 	mutex_unlock(&ve_list_lock);
 }
 
@@ -489,6 +471,9 @@ int ve_start_container(struct ve_struct *ve)
 	struct task_struct *tsk = current;
 	int err;
 
+	if (!ve->veid)
+		return -ENOENT;
+
 	if (ve->is_running || ve->ve_ns)
 		return -EBUSY;
 
@@ -559,7 +544,7 @@ err_dev:
 err_umh:
 	ve_stop_kthread(ve);
 err_kthread:
-	ve_list_del(ve);
+	ve_list_del(ve, false);
 	ve_drop_context(ve);
 	return err;
 }
@@ -622,7 +607,7 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
 	down_write(&ve->op_sem);
 	ve_hook_iterate_fini(VE_SS_CHAIN, ve);
 
-	ve_list_del(ve);
+	ve_list_del(ve, false);
 	ve_drop_context(ve);
 	up_write(&ve->op_sem);
 
@@ -634,7 +619,6 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
 static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
 {
 	struct ve_struct *ve = &ve0;
-	long id;
 	int err;
 
 	if (!cg->parent)
@@ -644,26 +628,11 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
 	if (cgroup_ve(cg->parent) != ve)
 		return ERR_PTR(-ENOTDIR);
 
-	/*
-	 * If the cgroup has a numeric name, allocate ID to match it. This is
-	 * required for compatibility with the old interface where VEs do not
-	 * have names and are identified only by VE ID.
-	 */
-	if (kstrtol(cg->dentry->d_name.name, 10, &id) ||
-	    id < 0 || id >= INT_MAX)
-		id = 0;
-	id = veid_alloc(id);
-	if (id < 0) {
-		err = id;
-		goto err_id;
-	}
-
 	err = -ENOMEM;
 	ve = kmem_cache_zalloc(ve_cachep, GFP_KERNEL);
 	if (!ve)
 		goto err_ve;
 
-	ve->veid = id;
 	ve->ve_name = kstrdup(cg->dentry->d_name.name, GFP_KERNEL);
 	if (!ve->ve_name)
 		goto err_name;
@@ -696,8 +665,6 @@ err_lat:
 err_name:
 	kmem_cache_free(ve_cachep, ve);
 err_ve:
-	veid_free(id);
-err_id:
 	return ERR_PTR(err);
 }
 
@@ -705,8 +672,7 @@ static void ve_offline(struct cgroup *cg)
 {
 	struct ve_struct *ve = cgroup_ve(cg);
 
-	ve_list_del(ve);
-	veid_free(ve->veid);
+	ve_list_del(ve, true);
 }
 
 static void ve_devmnt_free(struct ve_devmnt *devmnt)
@@ -886,10 +852,42 @@ out_unlock:
 	return ret;
 }
 
-static int ve_legacy_veid_read(struct cgroup *cg, struct cftype *cft,
-		struct seq_file *m)
+static u64 ve_id_read(struct cgroup *cg, struct cftype *cft)
 {
-	return seq_printf(m, "%u\n", cgroup_ve(cg)->veid);
+	return cgroup_ve(cg)->veid;
+}
+
+static int ve_id_write(struct cgroup *cg, struct cftype *cft, u64 value)
+{
+	struct ve_struct *ve = cgroup_ve(cg);
+	int veid;
+	int err = 0;
+
+	if (value <= 0 || value > INT_MAX)
+		return -EINVAL;
+
+	down_write(&ve->op_sem);
+	if (ve->veid) {
+		if (ve->veid != value)
+			err = -EBUSY;
+		goto out;
+	}
+
+	mutex_lock(&ve_list_lock);
+	/* we forbid to start a container without veid (see ve_start_container)
+	 * so the ve cannot be on the list */
+	BUG_ON(!list_empty(&ve->ve_list));
+	veid = idr_alloc(&ve_idr, NULL, value, value + 1, GFP_KERNEL);
+	if (veid < 0) {
+		err = veid;
+		if (err == -ENOSPC)
+			err = -EEXIST;
+	} else
+		ve->veid = veid;
+	mutex_unlock(&ve_list_lock);
+out:
+	up_write(&ve->op_sem);
+	return err;
 }
 
 /*
@@ -1063,7 +1061,6 @@ up_opsem:
 
 enum {
 	VE_CF_STATE,
-	VE_CF_LEGACY_VEID,
 	VE_CF_FEATURES,
 	VE_CF_IPTABLES_MASK,
 };
@@ -1126,10 +1123,10 @@ static struct cftype ve_cftypes[] = {
 		.private		= VE_CF_STATE,
 	},
 	{
-		.name			= "legacy_veid",
+		.name			= "veid",
 		.flags			= CFTYPE_NOT_ON_ROOT,
-		.read_seq_string	= ve_legacy_veid_read,
-		.private		= VE_CF_LEGACY_VEID,
+		.read_u64		= ve_id_read,
+		.write_u64		= ve_id_write,
 	},
 	{
 		.name			= "features",



More information about the Devel mailing list