[Devel] [PATCH RHEL7 COMMIT] cgroup_freezer: print information about unfreezable process

Konstantin Khorenko khorenko at virtuozzo.com
Tue Nov 29 19:34:18 MSK 2022


The commit is pushed to "branch-rh7-3.10.0-1160.80.1.vz7.190.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.80.1.vz7.190.1
------>
commit 62fbd153cd9a6e2ee8d29da05a1feb981f12c6d0
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Tue Nov 29 18:56:39 2022 +0300

    cgroup_freezer: print information about unfreezable process
    
    Add a sysctl kernel.freeze_cgroup_timeout (default value 30 * HZ).
    
    If one writes FROZEN to freezer.state file and after a timeout of
    kernel.freeze_cgroup_timeout one still reads FREEZING from freezer.state
    file (meaning that kernel does not succeed to freeze cgroup processes
    still) - let's print a warning with information about the problem, e.g.:
    
    [ 7196.621368] Freeze of /test took 0 sec, due to unfreezable process 13732:bash, stack:
    [ 7196.621396] [<ffffffffa2df9556>] retint_careful+0x14/0x32
    [ 7196.621431] [<ffffffffffffffff>] 0xffffffffffffffff
    
    The output includes:
     - path to problematic freezer cgroup
     - timeout in seconds
     - unfeezable process pid, comm and stack
    
    https://jira.sw.ru/browse/PSBM-142970
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 include/linux/sysctl.h  |  2 ++
 kernel/cgroup_freezer.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sysctl.c         | 10 +++++++
 3 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index f28d9fb58c03..798b0465cb93 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -189,6 +189,8 @@ struct ctl_path {
 extern int ve_allow_module_load;
 extern int __read_mostly lazytime_default;
 extern int trusted_exec;
+#define DEFAULT_FREEZE_TIMEOUT (30 * HZ)
+extern int sysctl_freeze_timeout;
 
 #ifdef CONFIG_SYSCTL
 
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f31d68f55db0..d4747ff98090 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -21,6 +21,10 @@
 #include <linux/uaccess.h>
 #include <linux/freezer.h>
 #include <linux/seq_file.h>
+#include <linux/jiffies.h>
+#include <linux/ratelimit.h>
+#include <linux/stacktrace.h>
+#include <linux/sysctl.h>
 
 /*
  * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
@@ -43,6 +47,7 @@ struct freezer {
 	struct cgroup_subsys_state	css;
 	unsigned int			state;
 	spinlock_t			lock;
+	unsigned long			freeze_jiffies;
 };
 
 static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
@@ -242,6 +247,61 @@ static void freezer_fork(struct task_struct *task, void *private)
 	rcu_read_unlock();
 }
 
+#define MAX_STACK_TRACE_DEPTH   64
+
+static void check_freezer_timeout(struct cgroup *cgroup,
+		                  struct task_struct *task)
+{
+	static DEFINE_RATELIMIT_STATE(freeze_timeout_rs,
+				      DEFAULT_FREEZE_TIMEOUT, 1);
+	int __freeze_timeout = READ_ONCE(sysctl_freeze_timeout);
+	struct freezer *freezer = cgroup_freezer(cgroup);
+	struct stack_trace trace;
+	unsigned long *entries;
+	char *freezer_cg_name;
+	pid_t tgid;
+	int i;
+
+	if (!freezer->freeze_jiffies ||
+	    freezer->freeze_jiffies + __freeze_timeout > get_jiffies_64())
+		return;
+
+	if (!__ratelimit(&freeze_timeout_rs))
+		return;
+
+	freezer_cg_name = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (!freezer_cg_name)
+		return;
+
+	if (cgroup_path(cgroup, freezer_cg_name, PATH_MAX) < 0)
+		goto free_cg_name;
+
+	tgid = task_pid_nr_ns(task, &init_pid_ns);
+
+	printk(KERN_WARNING "Freeze of %s took %d sec, "
+	       "due to unfreezable process %d:%s, stack:\n",
+	       freezer_cg_name, __freeze_timeout/HZ, tgid, task->comm);
+
+	entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries),
+			  GFP_KERNEL);
+	if (!entries)
+		goto free_cg_name;
+
+	memset(&trace, 0, sizeof(trace));
+	trace.max_entries = MAX_STACK_TRACE_DEPTH;
+	trace.entries = entries;
+	save_stack_trace_tsk(task, &trace);
+
+	for (i = 0; i < trace.nr_entries; i++) {
+		printk(KERN_WARNING "[<%pK>] %pB\n",
+		       (void *)entries[i], (void *)entries[i]);
+	}
+
+	kfree(entries);
+free_cg_name:
+	kfree(freezer_cg_name);
+}
+
 /**
  * update_if_frozen - update whether a cgroup finished freezing
  * @cgroup: cgroup of interest
@@ -293,8 +353,10 @@ static void update_if_frozen(struct cgroup *cgroup)
 			 * completion.  Consider it frozen in addition to
 			 * the usual frozen condition.
 			 */
-			if (!frozen(task) && !freezer_should_skip(task))
+			if (!frozen(task) && !freezer_should_skip(task)) {
+				check_freezer_timeout(cgroup, task);
 				goto out_iter_end;
+			}
 		}
 	}
 
@@ -367,8 +429,10 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
 		return;
 
 	if (freeze) {
-		if (!(freezer->state & CGROUP_FREEZING))
+		if (!(freezer->state & CGROUP_FREEZING)) {
 			atomic_inc(&system_freezing_cnt);
+			freezer->freeze_jiffies = get_jiffies_64();
+		}
 		freezer->state |= state;
 		freeze_cgroup(freezer);
 	} else {
@@ -377,8 +441,10 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
 		freezer->state &= ~state;
 
 		if (!(freezer->state & CGROUP_FREEZING)) {
-			if (was_freezing)
+			if (was_freezing) {
+				freezer->freeze_jiffies = 0;
 				atomic_dec(&system_freezing_cnt);
+			}
 			freezer->state &= ~CGROUP_FROZEN;
 			unfreeze_cgroup(freezer);
 		}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ccebbfaf9c8..b8dd96172edf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -130,6 +130,8 @@ static int __init set_trusted_exec(char *str)
 }
 __setup("trusted_exec", set_trusted_exec);
 
+int sysctl_freeze_timeout = DEFAULT_FREEZE_TIMEOUT;
+
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
@@ -1281,6 +1283,14 @@ static struct ctl_table kern_table[] = {
 		.extra2         = &one,
 	},
 #endif
+	{
+		.procname	= "freeze_cgroup_timeout",
+		.data		= &sysctl_freeze_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+	},
 	{ }
 };
 


More information about the Devel mailing list