[Devel] [PATCH rh7] cgroup/ve: fix possible circular locking dependency

Andrey Ryabinin aryabinin at virtuozzo.com
Wed Feb 8 08:47:30 PST 2017


cgroup_add_ve_root_files() takes ->i_mutex with cgroup_mutex
held which is unsafe because it violates lock order in the rest
of cgroup code:

 ======================================================
 [ INFO: possible circular locking dependency detected ]
 3.10.0-514.6.1.vz7.28.3.debug #1 Not tainted
 -------------------------------------------------------
 vzctl/4073 is trying to acquire lock:
  (&sb->s_type->i_mutex_key#7){+.+.+.}, at: [<ffffffff8137f5b9>] cgroup_mark_ve_root+0x129/0x2c0

is already holding lock:
  (cgroup_mutex){+.+.+.}, at: [<ffffffff8137f4c1>] cgroup_mark_ve_root+0x31/0x2c0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:

-> #1 (cgroup_mutex){+.+.+.}:
        [<ffffffff813280eb>] __lock_acquire+0x75b/0x12c0
        [<ffffffff8132971a>] lock_acquire+0x10a/0x400
        [<ffffffff824ef6de>] mutex_lock_nested+0x12e/0xbc0
        [<ffffffff8137c527>] cgroup_mount+0x897/0xe90
        [<ffffffff81654dde>] mount_fs+0x3e/0x240
        [<ffffffff816b187b>] vfs_kern_mount+0x6b/0x260
        [<ffffffff816b1aab>] kern_mount_data+0x3b/0xa0
        [<ffffffff813809c8>] cgroup_kernel_mount+0x18/0x20
        [<ffffffff83194822>] ub_init_cgroup+0xd7/0x1a4
        [<ffffffff81002150>] do_one_initcall+0x120/0x350
        [<ffffffff8313da86>] kernel_init_freeable+0x4cd/0x59a
        [<ffffffff824c96de>] kernel_init+0xe/0xf0
        [<ffffffff82516158>] ret_from_fork+0x58/0x90

-> #0 (&sb->s_type->i_mutex_key#7){+.+.+.}:
        [<ffffffff81323d9a>] validate_chain.isra.44+0x1f7a/0x2ef0
        [<ffffffff813280eb>] __lock_acquire+0x75b/0x12c0
        [<ffffffff8132971a>] lock_acquire+0x10a/0x400
        [<ffffffff824ef6de>] mutex_lock_nested+0x12e/0xbc0
        [<ffffffff8137f5b9>] cgroup_mark_ve_root+0x129/0x2c0
        [<ffffffff81310648>] ve_start_container+0x978/0xac0
        [<ffffffff81311c80>] ve_state_write+0x320/0x8e0
        [<ffffffff81372a0d>] cgroup_file_write+0x5fd/0xb90
        [<ffffffff816498f2>] vfs_write+0x1e2/0x650
        [<ffffffff8164c1e8>] SyS_write+0x178/0x270
        [<ffffffff82516209>] system_call_fastpath+0x16/0x1

  Possible unsafe locking scenario:
        CPU0                    CPU1
        ----                    ----
   lock(cgroup_mutex);
                                lock(&sb->s_type->i_mutex_key#7);
                                lock(cgroup_mutex);
   lock(&sb->s_type->i_mutex_key#7);

Instead of adding 'cgroup.subgroups_limit' files only on VE_ROOT cgroups let's
add them on every cgroup, but allow writing to it only in VE_ROOT cgroup.
This is much more simpler and allow us to kill a lot of bogus code.

https://jira.sw.ru/browse/PSBM-59122
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 include/linux/cgroup.h |  1 -
 kernel/cgroup.c        | 34 ++++------------------------------
 2 files changed, 4 insertions(+), 31 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d99251b..ea02186 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -446,7 +446,6 @@ struct cgroup_map_cb {
 #define CFTYPE_NOT_ON_ROOT	(1U << 1)	/* don't create on root cg */
 #define CFTYPE_INSANE		(1U << 2)	/* don't create if sane_behavior */
 #define CFTYPE_VE_WRITABLE	(1U << 15)	/* allow write from CT */
-#define CFTYPE_ONLY_ON_VE_ROOT	(1U << 16)	/* only create on CT's root cg */
 
 #define MAX_CFTYPE_NAME		64
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e8ec5f2..ae9ef0d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2807,9 +2807,6 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 			continue;
 		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
 			continue;
-		if ((cft->flags & CFTYPE_ONLY_ON_VE_ROOT) &&
-				!test_bit(CGRP_VE_ROOT, &cgrp->flags))
-			continue;
 
 		if (is_add) {
 			err = cgroup_add_file(cgrp, subsys, cft);
@@ -4049,6 +4046,9 @@ static int cgroup_write_subgroups_limit(struct cgroup *cgrp,
 					struct cftype *cft,
 					u64 val)
 {
+	if (!test_bit(CGRP_VE_ROOT, &cgrp->flags))
+		return -EACCES;
+
 	cgrp->subgroups_limit = val;
 	return 0;
 }
@@ -4105,7 +4105,6 @@ static struct cftype files[] = {
 	},
 	{
 		.name = "cgroup.subgroups_limit",
-		.flags = CFTYPE_ONLY_ON_VE_ROOT,
 		.read_u64 = cgroup_read_subgroups_limit,
 		.write_u64 = cgroup_write_subgroups_limit,
 		.mode = S_IRUGO | S_IWUSR,
@@ -4239,30 +4238,6 @@ static int subgroups_count(struct cgroup *cgroup)
 }
 
 #ifdef CONFIG_VE
-static void cgroup_add_ve_root_files(struct cgroup *cgrp,
-				struct cgroup_subsys *subsys,
-				struct cftype cfts[])
-{
-	struct cftype *cft;
-
-	if (!test_bit(CGRP_VE_ROOT, &cgrp->flags))
-		return;
-
-	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
-	for (cft = cfts; cft->name[0] != '\0'; cft++) {
-		int err;
-
-		if (!(cft->flags & CFTYPE_ONLY_ON_VE_ROOT))
-			continue;
-
-		err = cgroup_add_file(cgrp, subsys, cft);
-		if (err)
-			pr_warn("cgroup_add_ve_root_files: failed to add %s, err=%d\n",
-				cft->name, err);
-	}
-	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
-}
-
 void cgroup_mark_ve_root(struct ve_struct *ve)
 {
 	struct cgroup *cgrp;
@@ -4271,8 +4246,7 @@ void cgroup_mark_ve_root(struct ve_struct *ve)
 	mutex_lock(&cgroup_mutex);
 	for_each_active_root(root) {
 		cgrp = task_cgroup_from_root(ve->init_task, root);
-		if (!test_and_set_bit(CGRP_VE_ROOT, &cgrp->flags))
-			cgroup_add_ve_root_files(cgrp, NULL, files);
+		set_bit(CGRP_VE_ROOT, &cgrp->flags);
 	}
 	mutex_unlock(&cgroup_mutex);
 }
-- 
2.10.2



More information about the Devel mailing list