[Devel] [PATCH RH7] cgroup: debug cgroup mutex lock/unlock
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Wed Mar 30 18:14:37 MSK 2022
/proc/sys/kernel/cgroup_mutex_debug - to enable debug
/proc/sys/kernel/cgroup_mutex_threshold - to set threshold in ns
This is debug for:
https://jira.sw.ru/browse/PSBM-139206
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
include/linux/cgroup.h | 2 +
kernel/cgroup.c | 122 +++++++++++++++++++++++++++++++++++++++++
kernel/sysctl.c | 17 ++++++
3 files changed, 141 insertions(+)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 6ed84d9dfc17..83afa4a72f49 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -808,6 +808,8 @@ extern struct mutex cgroup_mutex;
#define task_css_set_check(task, __c) \
rcu_dereference((task)->cgroups)
#endif
+extern int sysctl_cgroup_mutex_debug;
+extern u64 sysctl_cgroup_mutex_threshold;
/**
* task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 846e22644474..9c409b4329bd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -64,6 +64,7 @@
#include <linux/stacktrace.h>
#include <linux/exportfs.h>
#include <linux/random.h>
+#include <linux/sched.h>
#include <linux/atomic.h>
@@ -90,6 +91,13 @@ EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for task_subsys_state_check() */
static DEFINE_MUTEX(cgroup_mutex);
#endif
+int sysctl_cgroup_mutex_debug;
+u64 sysctl_cgroup_mutex_threshold;
+
+struct task_struct *cgroup_mutex_owner;
+u64 cgroup_mutex_taketime;
+int lock_id;
+
static DEFINE_MUTEX(cgroup_root_mutex);
/*
@@ -254,6 +262,61 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
return __d_cfe(dentry)->type;
}
+static void debug_cgroup_mutex_lock(int id)
+{
+ if (!sysctl_cgroup_mutex_debug)
+ return;
+
+ if (unlikely(lock_id != 0))
+ pr_warn_ratelimited("cgmtxlck[%d]: old_id=%d, should be 0\n", id, lock_id);
+ if (unlikely(cgroup_mutex_owner != NULL))
+ pr_warn_ratelimited("cgmtxlck[%d]: old_tsk=%p, should be NULL\n", id, cgroup_mutex_owner);
+ if (unlikely(cgroup_mutex_taketime != 0))
+ pr_warn_ratelimited("cgmtxlck[%d]: old_time=%llu, should be 0\n", id, cgroup_mutex_taketime);
+
+ lock_id = id;
+ cgroup_mutex_owner = current;
+ cgroup_mutex_taketime = ktime_get_mono_fast_ns();
+}
+
+static void debug_cgroup_mutex_unlock(int id)
+{
+ u64 now, delta;
+
+ if (!sysctl_cgroup_mutex_debug)
+ return;
+
+ if (unlikely(lock_id == 0)) {
+ pr_warn_ratelimited("cgmtxunlck[%d]: unlock with zero lock_id\n", id);
+ goto cleanup;
+ }
+
+ if (unlikely(cgroup_mutex_owner != current)) {
+ pr_warn_ratelimited("cgmtxunlck[%d]: task locked %p != task unlocked %p\n", id, cgroup_mutex_owner, current);
+ goto cleanup;
+ }
+
+ if (unlikely(cgroup_mutex_taketime == 0)) {
+ pr_warn_ratelimited("cgmtxunlck[%d]: unlock with zero taketime\n", id);
+ goto cleanup;
+ }
+
+ now = ktime_get_mono_fast_ns();
+ if (cgroup_mutex_taketime > now)
+ /* Fallback for clock going backwards =) */
+ now = cgroup_mutex_taketime;
+
+ delta = now - cgroup_mutex_taketime;
+
+ if (delta > sysctl_cgroup_mutex_threshold)
+ trace_printk("cgmtxunlck[%d]: long mutex hold for %llu==(%llu-%llu) \"%s\":%d [%d]\n",
+ id, delta, now, cgroup_mutex_taketime, current->comm, task_pid_nr(current), lock_id);
+cleanup:
+ lock_id = 0;
+ cgroup_mutex_owner = NULL;
+ cgroup_mutex_taketime = 0;
+}
+
/**
* cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
* @cgrp: the cgroup to be checked for liveness
@@ -264,7 +327,9 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
static bool cgroup_lock_live_group(struct cgroup *cgrp)
{
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(1);
if (cgroup_is_removed(cgrp)) {
+ debug_cgroup_mutex_unlock(1);
mutex_unlock(&cgroup_mutex);
return false;
}
@@ -890,6 +955,7 @@ static void cgroup_free_fn(struct work_struct *work)
struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(2);
/*
* Release the subsystem state objects.
*/
@@ -897,6 +963,7 @@ static void cgroup_free_fn(struct work_struct *work)
ss->css_free(cgrp);
cgrp->root->number_of_cgroups--;
+ debug_cgroup_mutex_unlock(2);
mutex_unlock(&cgroup_mutex);
/*
@@ -1413,6 +1480,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(3);
mutex_lock(&cgroup_root_mutex);
/* See what subsystems are wanted */
@@ -1464,6 +1532,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
kfree(opts.release_agent);
kfree(opts.name);
mutex_unlock(&cgroup_root_mutex);
+ debug_cgroup_mutex_unlock(3);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
return ret;
@@ -1871,7 +1940,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
/* First find the desired set of subsystems */
if (!(flags & MS_KERNMOUNT)) {
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(4);
ret = parse_cgroupfs_options(data, &opts);
+ debug_cgroup_mutex_unlock(4);
mutex_unlock(&cgroup_mutex);
} else {
opts = *(struct cgroup_sb_opts *)data;
@@ -1921,6 +1992,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(5);
mutex_lock(&cgroup_root_mutex);
/* Check for name clashes with existing mounts */
@@ -1982,6 +2054,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
revert_creds(cred);
mutex_unlock(&cgroup_root_mutex);
+ debug_cgroup_mutex_unlock(5);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
} else {
@@ -2011,6 +2084,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
unlock_drop:
mutex_unlock(&cgroup_root_mutex);
+ debug_cgroup_mutex_unlock(6);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
drop_new_super:
@@ -2036,6 +2110,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
BUG_ON(!list_empty(&cgrp->children));
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(7);
mutex_lock(&cgroup_root_mutex);
/* Rebind all subsystems back to the default hierarchy */
@@ -2063,6 +2138,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
}
mutex_unlock(&cgroup_root_mutex);
+ debug_cgroup_mutex_unlock(7);
mutex_unlock(&cgroup_mutex);
simple_xattrs_free(&cgrp->xattrs);
@@ -2508,6 +2584,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
put_task_struct(tsk);
out_unlock_cgroup:
+ debug_cgroup_mutex_unlock(8);
mutex_unlock(&cgroup_mutex);
return ret;
}
@@ -2523,6 +2600,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
int retval = 0;
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(9);
for_each_active_root(root) {
struct cgroup *from_cg = task_cgroup_from_root(from, root);
@@ -2530,6 +2608,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
if (retval)
break;
}
+ debug_cgroup_mutex_unlock(9);
mutex_unlock(&cgroup_mutex);
return retval;
@@ -2580,6 +2659,7 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
ret = -ENODEV;
out:
+ debug_cgroup_mutex_unlock(10);
mutex_unlock(&cgroup_mutex);
return ret;
}
@@ -2603,6 +2683,7 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
rcu_read_unlock();
}
seq_putc(seq, '\n');
+ debug_cgroup_mutex_unlock(11);
mutex_unlock(&cgroup_mutex);
return 0;
}
@@ -3154,6 +3235,7 @@ static void cgroup_cfts_prepare(void)
*/
mutex_lock(&cgroup_cft_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(12);
}
static void cgroup_cfts_commit(struct cgroup_subsys *ss,
@@ -3175,6 +3257,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
sb = NULL;
}
+ debug_cgroup_mutex_unlock(12);
mutex_unlock(&cgroup_mutex);
/*
@@ -3186,8 +3269,10 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(13);
if (!cgroup_is_removed(cgrp))
cgroup_addrm_files(cgrp, ss, cfts, is_add);
+ debug_cgroup_mutex_unlock(13);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
@@ -3668,7 +3753,9 @@ static void cgroup_transfer_one_task(struct task_struct *task,
struct cgroup *new_cgroup = scan->data;
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(14);
cgroup_attach_task(new_cgroup, task, false);
+ debug_cgroup_mutex_unlock(14);
mutex_unlock(&cgroup_mutex);
}
@@ -4681,7 +4768,9 @@ struct ve_struct *get_curr_ve(void)
* cgroup_attach_task() protects us from it.
*/
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(15);
ve = get_ve(current->task_ve);
+ debug_cgroup_mutex_unlock(15);
mutex_unlock(&cgroup_mutex);
return ve;
@@ -4700,6 +4789,7 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
mutex_lock(&cgroup_cft_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(16);
/*
* Return early if we know that this procedure will fail due to
@@ -4708,6 +4798,7 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
* without adding itself to all virtualized subgroups (+systemd) first.
*/
if (css_has_host_cgroups(ve->root_css_set)) {
+ debug_cgroup_mutex_unlock(16);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgroup_cft_mutex);
return -EINVAL;
@@ -4734,6 +4825,7 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask))
link_ve_root_cpu_cgroup(cgrp);
}
+ debug_cgroup_mutex_unlock(17);
mutex_unlock(&cgroup_mutex);
synchronize_rcu();
list_for_each_entry_safe(cgrp, tmp, &pending, cft_q_node) {
@@ -4747,8 +4839,10 @@ int cgroup_mark_ve_roots(struct ve_struct *ve)
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(18);
if (!cgroup_is_removed(cgrp))
err = cgroup_add_file(cgrp, NULL, cft);
+ debug_cgroup_mutex_unlock(18);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
@@ -4773,6 +4867,7 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
mutex_lock(&cgroup_cft_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(19);
for_each_active_root(root) {
cgrp = css_cgroup_from_root(ve->root_css_set, root);
@@ -4786,17 +4881,20 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
dget(cgrp->dentry);
list_add_tail(&cgrp->cft_q_node, &pending);
}
+ debug_cgroup_mutex_unlock(19);
mutex_unlock(&cgroup_mutex);
list_for_each_entry_safe(cgrp, tmp, &pending, cft_q_node) {
struct inode *inode = cgrp->dentry->d_inode;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(20);
cgroup_rm_file(cgrp, cft);
dput(cgrp->dentry);
BUG_ON(!rcu_dereference_protected(cgrp->ve_owner,
lockdep_is_held(&cgroup_mutex)));
rcu_assign_pointer(cgrp->ve_owner, NULL);
clear_bit(CGRP_VE_ROOT, &cgrp->flags);
+ debug_cgroup_mutex_unlock(20);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
}
@@ -4964,6 +5062,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
refcount_inc(&cgrp->online_cnt);
refcount_inc(&parent->online_cnt);
+ debug_cgroup_mutex_unlock(21);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
@@ -4978,6 +5077,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
ss->css_free(cgrp);
}
}
+ debug_cgroup_mutex_unlock(22);
mutex_unlock(&cgroup_mutex);
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
@@ -4991,6 +5091,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
err_destroy:
cgroup_destroy_locked(cgrp);
+ debug_cgroup_mutex_unlock(23);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&dentry->d_inode->i_mutex);
return err;
@@ -5150,6 +5251,7 @@ static void cgroup_offline_fn(struct work_struct *work)
struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(24);
do {
struct cgroup *parent = cgrp->parent;
@@ -5183,6 +5285,7 @@ static void cgroup_offline_fn(struct work_struct *work)
cgrp = parent;
} while (cgrp && refcount_dec_and_test(&cgrp->online_cnt));
+ debug_cgroup_mutex_unlock(24);
mutex_unlock(&cgroup_mutex);
}
@@ -5191,7 +5294,9 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
int ret;
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(25);
ret = cgroup_destroy_locked(dentry->d_fsdata);
+ debug_cgroup_mutex_unlock(25);
mutex_unlock(&cgroup_mutex);
return ret;
@@ -5218,6 +5323,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(26);
/* init base cftset */
cgroup_init_cftsets(ss);
@@ -5245,6 +5351,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
BUG_ON(online_css(ss, dummytop));
+ debug_cgroup_mutex_unlock(26);
mutex_unlock(&cgroup_mutex);
/* this function shouldn't be used with modular subsystems, since they
@@ -5297,6 +5404,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
cgroup_init_cftsets(ss);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(27);
subsys[ss->subsys_id] = ss;
/*
@@ -5308,6 +5416,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
if (IS_ERR(css)) {
/* failure case - need to deassign the subsys[] slot. */
subsys[ss->subsys_id] = NULL;
+ debug_cgroup_mutex_unlock(27);
mutex_unlock(&cgroup_mutex);
return PTR_ERR(css);
}
@@ -5346,10 +5455,12 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
goto err_unload;
/* success! */
+ debug_cgroup_mutex_unlock(28);
mutex_unlock(&cgroup_mutex);
return 0;
err_unload:
+ debug_cgroup_mutex_unlock(29);
mutex_unlock(&cgroup_mutex);
/* @ss can't be mounted here as try_module_get() would fail */
cgroup_unload_subsys(ss);
@@ -5379,6 +5490,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
BUG_ON(ss->root != &rootnode);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(30);
offline_css(ss, dummytop);
@@ -5412,6 +5524,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
ss->css_free(dummytop);
dummytop->subsys[ss->subsys_id] = NULL;
+ debug_cgroup_mutex_unlock(30);
mutex_unlock(&cgroup_mutex);
}
EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
@@ -5579,6 +5692,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
retval = 0;
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(31);
for_each_active_root(root) {
struct cgroup_subsys *ss;
@@ -5603,6 +5717,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
}
out_unlock:
+ debug_cgroup_mutex_unlock(31);
mutex_unlock(&cgroup_mutex);
put_task_struct(tsk);
out_free:
@@ -5625,6 +5740,7 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
* subsys/hierarchy state.
*/
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(32);
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
int num;
@@ -5639,6 +5755,7 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
ss->name, ss->root->hierarchy_id,
num, !ss->disabled);
}
+ debug_cgroup_mutex_unlock(32);
mutex_unlock(&cgroup_mutex);
return 0;
}
@@ -5939,6 +6056,7 @@ void cgroup_release_agent(struct work_struct *work)
ve = container_of(work, struct ve_struct, release_agent_work);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(33);
raw_spin_lock(&ve->release_list_lock);
while (!list_empty(&ve->release_list)) {
char *argv[3], *envp[3];
@@ -5999,6 +6117,7 @@ void cgroup_release_agent(struct work_struct *work)
/* Drop the lock while we invoke the usermode helper,
* since the exec could involve hitting disk and hence
* be a slow process */
+ debug_cgroup_mutex_unlock(33);
mutex_unlock(&cgroup_mutex);
err = call_usermodehelper_fns_ve(ve, argv[0], argv,
@@ -6011,11 +6130,13 @@ void cgroup_release_agent(struct work_struct *work)
agentbuf, pathbuf, err);
mutex_lock(&cgroup_mutex);
+ debug_cgroup_mutex_lock(34);
continue_free:
kfree(pathbuf);
raw_spin_lock(&ve->release_list_lock);
}
raw_spin_unlock(&ve->release_list_lock);
+ debug_cgroup_mutex_unlock(34);
mutex_unlock(&cgroup_mutex);
kfree(agentbuf);
}
@@ -6292,6 +6413,7 @@ int cgroup_kernel_attach(struct cgroup *cgrp, struct task_struct *tsk)
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
ret = cgroup_attach_task(cgrp, tsk, true);
+ debug_cgroup_mutex_unlock(35);
mutex_unlock(&cgroup_mutex);
return ret;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6e2645dae41b..68c702356492 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -65,6 +65,7 @@
#include <linux/kexec.h>
#include <linux/mount.h>
#include <linux/ve.h>
+#include <linux/cgroup.h>
#include "../lib/kstrtox.h"
@@ -1278,6 +1279,22 @@ static struct ctl_table kern_table[] = {
.extra2 = &one,
},
#endif
+ {
+ .procname = "cgroup_mutex_debug",
+ .data = &sysctl_cgroup_mutex_debug,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "cgroup_mutex_threshold",
+ .data = &sysctl_cgroup_mutex_threshold,
+ .maxlen = sizeof(sysctl_cgroup_mutex_threshold),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
{ }
};
--
2.35.1
More information about the Devel
mailing list