[Devel] [PATCH v2 RH7] cgroup_freezer: print information about unfreezable process

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Fri Nov 25 15:44:24 MSK 2022


Add a sysctl kernel.freeze_cgroup_timeout (default value 30 * HZ).

If one writes FROZEN to freezer.state file and after a timeout of
kernel.freeze_cgroup_timeout one still reads FREEZING from freezer.state
file (meaning that kernel does not succeed to freeze cgroup processes
still) - let's print a warning with information about the problem, e.g.:

[ 7196.621368] Freeze of /test took 0 sec, due to unfreezable process 13732:bash, stack:
[ 7196.621396] [<ffffffffa2df9556>] retint_careful+0x14/0x32
[ 7196.621431] [<ffffffffffffffff>] 0xffffffffffffffff

The output includes:
- path to problematic freezer cgroup
- timeout in seconds
- unfeezable process pid, comm and stack

https://jira.sw.ru/browse/PSBM-142970

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
v2: fix pointer print formating %pS -> %pB
---
 include/linux/sysctl.h  |  2 ++
 kernel/cgroup_freezer.c | 55 ++++++++++++++++++++++++++++++++++++++---
 kernel/sysctl.c         | 10 ++++++++
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index f28d9fb58c03..798b0465cb93 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -189,6 +189,8 @@ struct ctl_path {
 extern int ve_allow_module_load;
 extern int __read_mostly lazytime_default;
 extern int trusted_exec;
+#define DEFAULT_FREEZE_TIMEOUT (30 * HZ)
+extern int sysctl_freeze_timeout;
 
 #ifdef CONFIG_SYSCTL
 
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f31d68f55db0..bb5380b89d4f 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -21,6 +21,10 @@
 #include <linux/uaccess.h>
 #include <linux/freezer.h>
 #include <linux/seq_file.h>
+#include <linux/jiffies.h>
+#include <linux/ratelimit.h>
+#include <linux/stacktrace.h>
+#include <linux/sysctl.h>
 
 /*
  * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
@@ -43,6 +47,7 @@ struct freezer {
 	struct cgroup_subsys_state	css;
 	unsigned int			state;
 	spinlock_t			lock;
+	unsigned long			freeze_jiffies;
 };
 
 static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
@@ -242,6 +247,44 @@ static void freezer_fork(struct task_struct *task, void *private)
 	rcu_read_unlock();
 }
 
+#define MAX_STACK_TRACE_DEPTH   64
+
+static void check_freezer_timeout(struct cgroup *cgroup, struct task_struct *task)
+{
+	static DEFINE_RATELIMIT_STATE(freeze_timeout_rs, DEFAULT_FREEZE_TIMEOUT, 1);
+	int __freeze_timeout = READ_ONCE(sysctl_freeze_timeout);
+	struct freezer *freezer = cgroup_freezer(cgroup);
+	unsigned long entries[MAX_STACK_TRACE_DEPTH];
+	static char freezer_cg_name[PATH_MAX];
+	struct stack_trace trace;
+	pid_t tgid;
+	int i;
+
+	if (!freezer->freeze_jiffies ||
+	    freezer->freeze_jiffies + __freeze_timeout > get_jiffies_64())
+		return;
+
+	if (!__ratelimit(&freeze_timeout_rs))
+		return;
+
+	if (cgroup_path(cgroup, freezer_cg_name, PATH_MAX) < 0)
+		return;
+
+	tgid = task_pid_nr_ns(task, &init_pid_ns);
+
+	printk(KERN_WARNING "Freeze of %s took %d sec, due to unfreezable process %d:%s, stack:\n",
+	       freezer_cg_name, __freeze_timeout/HZ, tgid, task->comm);
+
+	memset(&trace, 0, sizeof(trace));
+	trace.max_entries = MAX_STACK_TRACE_DEPTH;
+	trace.entries = entries;
+	save_stack_trace_tsk(task, &trace);
+
+	for (i = 0; i < trace.nr_entries; i++) {
+		printk(KERN_WARNING "[<%pK>] %pB\n", (void *)entries[i], (void *)entries[i]);
+	}
+}
+
 /**
  * update_if_frozen - update whether a cgroup finished freezing
  * @cgroup: cgroup of interest
@@ -293,8 +336,10 @@ static void update_if_frozen(struct cgroup *cgroup)
 			 * completion.  Consider it frozen in addition to
 			 * the usual frozen condition.
 			 */
-			if (!frozen(task) && !freezer_should_skip(task))
+			if (!frozen(task) && !freezer_should_skip(task)) {
+				check_freezer_timeout(cgroup, task);
 				goto out_iter_end;
+			}
 		}
 	}
 
@@ -367,8 +412,10 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
 		return;
 
 	if (freeze) {
-		if (!(freezer->state & CGROUP_FREEZING))
+		if (!(freezer->state & CGROUP_FREEZING)) {
 			atomic_inc(&system_freezing_cnt);
+			freezer->freeze_jiffies = get_jiffies_64();
+		}
 		freezer->state |= state;
 		freeze_cgroup(freezer);
 	} else {
@@ -377,8 +424,10 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
 		freezer->state &= ~state;
 
 		if (!(freezer->state & CGROUP_FREEZING)) {
-			if (was_freezing)
+			if (was_freezing) {
+				freezer->freeze_jiffies = 0;
 				atomic_dec(&system_freezing_cnt);
+			}
 			freezer->state &= ~CGROUP_FROZEN;
 			unfreeze_cgroup(freezer);
 		}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ccebbfaf9c8..b8dd96172edf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -130,6 +130,8 @@ static int __init set_trusted_exec(char *str)
 }
 __setup("trusted_exec", set_trusted_exec);
 
+int sysctl_freeze_timeout = DEFAULT_FREEZE_TIMEOUT;
+
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
@@ -1281,6 +1283,14 @@ static struct ctl_table kern_table[] = {
 		.extra2         = &one,
 	},
 #endif
+	{
+		.procname	= "freeze_cgroup_timeout",
+		.data		= &sysctl_freeze_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+	},
 	{ }
 };
 
-- 
2.37.3



More information about the Devel mailing list