[Devel] [PATCH vz9 1/9] ms/NFS/CIFS/SUNRPC: don't allow to freeze execution

Nikita Yushchenko nikita.yushchenko at virtuozzo.com
Fri Oct 1 09:32:47 MSK 2021


From: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>

Here is a deadlock scheme for 2 processes in one freezer cgroup, which is
freezing:

CPU 0                                   CPU 1
--------                                --------
do_last
inode_lock(dir->d_inode)
vfs_create
nfs_create
...
__rpc_execute
rpc_wait_bit_killable
__refrigerator
				    do_last
				    inode_lock(dir->d_inode)

So, the problem is that one process takes directory inode mutex, executes
creation request and goes to refrigerator.
Another one waits till directory lock is released, remains "thawed" and
thus
freezer cgroup state never becomes "FROZEN".

Notes:
1) Interesting, that this is not a pure deadlock: one can thaw cgroup and
then
freeze it again.
2) The issue was introduced by commit
d310310cbff18ec385c6ab4d58f33b100192a96a.
3) This patch is not aimed to fix the issue, but to show the problem root.
Look like this problem might be applicable to other hunks from the commit,
mentioned above.

Reverts upstream commit: d310310cbff18ec385c6ab4d58f33b100192a96a
Upstream discussion: https://lkml.org/lkml/2016/8/3/317

https://jira.sw.ru/browse/PSBM-50671
https://jira.sw.ru/browse/PSBM-54822

Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>

(cherry-picked from vz8 commit ff469e206c40 ("ms/NFS/CIFS/SUNRPC: don't
allow to freeze execution"))

Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 fs/cifs/inode.c         |  3 +--
 fs/nfs/callback.c       |  9 ++-------
 fs/nfs/inode.c          |  3 +--
 fs/nfs/nfs3proc.c       |  3 +--
 fs/nfs/nfs4proc.c       |  4 ++--
 fs/nfs/proc.c           |  1 -
 fs/nfs/write.c          |  1 -
 include/linux/freezer.h | 23 -----------------------
 net/sunrpc/sched.c      |  3 +--
 net/sunrpc/svc_xprt.c   |  9 ---------
 net/sunrpc/svcsock.c    |  1 -
 11 files changed, 8 insertions(+), 52 deletions(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 65f8a70cece3..cffb5e403010 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -10,7 +10,6 @@
 #include <linux/stat.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/freezer.h>
 #include <linux/sched/signal.h>
 #include <linux/wait_bit.h>
 #include <linux/fiemap.h>
@@ -2286,7 +2285,7 @@ cifs_invalidate_mapping(struct inode *inode)
 static int
 cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
 {
-	freezable_schedule_unsafe();
+	schedule();
 	if (signal_pending_state(mode, current))
 		return -ERESTARTSYS;
 	return 0;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7817ad94a6ba..bc413fba7b85 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,7 +16,6 @@
 #include <linux/nfs_fs.h>
 #include <linux/errno.h>
 #include <linux/mutex.h>
-#include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/bc_xprt.h>
@@ -78,9 +77,7 @@ nfs4_callback_svc(void *vrqstp)
 	int err;
 	struct svc_rqst *rqstp = vrqstp;
 
-	set_freezable();
-
-	while (!kthread_freezable_should_stop(NULL)) {
+	while (!kthread_should_stop()) {
 
 		if (signal_pending(current))
 			flush_signals(current);
@@ -110,9 +107,7 @@ nfs41_callback_svc(void *vrqstp)
 	int error;
 	DEFINE_WAIT(wq);
 
-	set_freezable();
-
-	while (!kthread_freezable_should_stop(NULL)) {
+	while (!kthread_should_stop()) {
 
 		if (signal_pending(current))
 			flush_signals(current);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 853213b3a209..5e6e293a1d3d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,7 +38,6 @@
 #include <linux/nfs_xdr.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
-#include <linux/freezer.h>
 #include <linux/uaccess.h>
 #include <linux/iversion.h>
 
@@ -74,7 +73,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 
 static int nfs_wait_killable(int mode)
 {
-	freezable_schedule_unsafe();
+	schedule();
 	if (signal_pending_state(mode, current))
 		return -ERESTARTSYS;
 	return 0;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2299446b3b89..474302ffb1f7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -18,7 +18,6 @@
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfs_mount.h>
-#include <linux/freezer.h>
 #include <linux/xattr.h>
 
 #include "iostat.h"
@@ -36,7 +35,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EJUKEBOX)
 			break;
-		freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
+		schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!fatal_signal_pending(current));
 	return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e1214bb6b7ee..7e3444f9a435 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -411,7 +411,7 @@ static int nfs4_delay_killable(long *timeout)
 {
 	might_sleep();
 
-	freezable_schedule_timeout_killable_unsafe(
+	schedule_timeout_killable(
 		nfs4_update_delay(timeout));
 	if (!__fatal_signal_pending(current))
 		return 0;
@@ -7320,7 +7320,7 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
 		status = nfs4_proc_setlk(state, cmd, request);
 		if ((status != -EAGAIN) || IS_SETLK(cmd))
 			break;
-		freezable_schedule_timeout_interruptible(timeout);
+		schedule_timeout_interruptible(timeout);
 		timeout *= 2;
 		timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout);
 		status = -ERESTARTSYS;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ea19dbf12301..cd8a045c6b7a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -42,7 +42,6 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
-#include <linux/freezer.h>
 #include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index eae9bf114041..c175f4294e67 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -22,7 +22,6 @@
 #include <linux/nfs_page.h>
 #include <linux/backing-dev.h>
 #include <linux/export.h>
-#include <linux/freezer.h>
 #include <linux/wait.h>
 #include <linux/iversion.h>
 
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 0621c5f86c39..1c3c8352ccb2 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -173,14 +173,6 @@ static inline void freezable_schedule(void)
 	freezer_count();
 }
 
-/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
-static inline void freezable_schedule_unsafe(void)
-{
-	freezer_do_not_count();
-	schedule();
-	freezer_count_unsafe();
-}
-
 /*
  * Like schedule_timeout(), but should not block the freezer.  Do not
  * call this with locks held.
@@ -228,16 +220,6 @@ static inline long freezable_schedule_timeout_killable(long timeout)
 	return __retval;
 }
 
-/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
-static inline long freezable_schedule_timeout_killable_unsafe(long timeout)
-{
-	long __retval;
-	freezer_do_not_count();
-	__retval = schedule_timeout_killable(timeout);
-	freezer_count_unsafe();
-	return __retval;
-}
-
 /*
  * Like schedule_hrtimeout_range(), but should not block the freezer.  Do not
  * call this with locks held.
@@ -288,8 +270,6 @@ static inline void set_freezable(void) {}
 
 #define freezable_schedule()  schedule()
 
-#define freezable_schedule_unsafe()  schedule()
-
 #define freezable_schedule_timeout(timeout)  schedule_timeout(timeout)
 
 #define freezable_schedule_timeout_interruptible(timeout)		\
@@ -301,9 +281,6 @@ static inline void set_freezable(void) {}
 #define freezable_schedule_timeout_killable(timeout)			\
 	schedule_timeout_killable(timeout)
 
-#define freezable_schedule_timeout_killable_unsafe(timeout)		\
-	schedule_timeout_killable(timeout)
-
 #define freezable_schedule_hrtimeout_range(expires, delta, mode)	\
 	schedule_hrtimeout_range(expires, delta, mode)
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index feaf6e98084d..a468a26f8110 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -19,7 +19,6 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
-#include <linux/freezer.h>
 #include <linux/sched/mm.h>
 
 #include <linux/sunrpc/clnt.h>
@@ -268,7 +267,7 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
 
 static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
 {
-	freezable_schedule_unsafe();
+	schedule();
 	if (signal_pending_state(mode, current))
 		return -ERESTARTSYS;
 	return 0;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index dbb41821b1b8..9c8065538131 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -7,7 +7,6 @@
 
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
 #include <net/sock.h>
@@ -717,10 +716,6 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 	if (signalled() || kthread_should_stop())
 		return false;
 
-	/* are we freezing? */
-	if (freezing(current))
-		return false;
-
 	return true;
 }
 
@@ -751,8 +746,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 	else
 		__set_current_state(TASK_RUNNING);
 
-	try_to_freeze();
-
 	set_bit(RQ_BUSY, &rqstp->rq_flags);
 	smp_mb__after_atomic();
 	rqstp->rq_xprt = svc_xprt_dequeue(pool);
@@ -857,8 +850,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	if (err)
 		goto out;
 
-	try_to_freeze();
-	cond_resched();
 	err = -EINTR;
 	if (signalled() || kthread_should_stop())
 		goto out;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 478f857cdaed..04db4b313be3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -35,7 +35,6 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/file.h>
-#include <linux/freezer.h>
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/ip.h>
-- 
2.30.2



More information about the Devel mailing list