[Devel] [PATCH RHEL7 COMMIT] fuse kio: Dereference sk_user_data under rcu

Konstantin Khorenko khorenko at virtuozzo.com
Tue Oct 16 18:09:44 MSK 2018


The commit is pushed to "branch-rh7-3.10.0-862.14.4.vz7.72.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.14.4.vz7.72.9
------>
commit 79c2293509a5ea2eadc7eb1c37d6c25bc9125c4d
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Tue Oct 16 18:09:42 2018 +0300

    fuse kio: Dereference sk_user_data under rcu
    
    We need to synchronize sio destruction with socket callbacks
    to prevent them touch sio freed memory. Use RCU to do that.
    Note, that pcs_sock_ioconn_destruct() will become used in next
    patch.
    
    v2: Use rcu callback instead of work func
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    Reviewed-by: Pavel Butsykin <pbutsykin at virtuozzo.com>
    
    =====================
    Patchset description:
    
    Order pcs_rpc and pcs_sockio destruction and close leaked socket
    
    https://pmc.acronis.com/browse/VSTOR-15305
    
    Ploop can asynchronously unmap regions by sending IOCB_CMD_UNMAP_ITER, but this
    command isn't quite correctly interpreted in Fuse. Moreover, in Fast-path mode,
    fallocate(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_ZERO_RANGE) falls to fuse user daemon
    and it can lead to data corruption.
    
    Let's fix it.
    
    Kirill Tkhai (9):
          fuse kio: Use __maybe_unused
          fuse kio: Use sio eof instead of parent to determ abort
          fuse kio: Reorder callback assignment
          fuse kio: Add pcs_cleanup_wq
          fuse kio: Destroy rpc in work func
          fuse kio: Introduce pcs_sk_kick_queue()
          fuse kio: Dereference sk_user_data under rcu
          fuse kio: Fix rpc socket leak on rpc_abort()
          fuse kio: Hold pcs_rpc counter till sio may be freed
---
 fs/fuse/kio/pcs/pcs_sock_io.c | 73 +++++++++++++++++++++++++++++++------------
 fs/fuse/kio/pcs/pcs_sock_io.h |  1 +
 2 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index d99046f05834..bcf4bf6745a8 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -11,12 +11,6 @@
 #include "log.h"
 
 
-static inline struct pcs_rpc * sock_to_rpc(struct sock *sk)
-{
-
-	return ((struct pcs_sockio *)sk->sk_user_data)->parent;
-}
-
 static void sio_msg_sent(struct pcs_msg * msg)
 {
 	msg->stage = PCS_MSG_STAGE_SENT;
@@ -191,6 +185,7 @@ static int do_sock_recv(struct socket *sock, void *buf, size_t len)
 
 	struct kvec iov = {buf, len};
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	struct pcs_sockio __maybe_unused *sio;
 	int ret = -EIO;
 
 	if (pcs_should_fail_sock_io())
@@ -198,8 +193,17 @@ static int do_sock_recv(struct socket *sock, void *buf, size_t len)
 
 	ret =  kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
 out:
-	TRACE("RET: "PEER_FMT" len:%ld ret:%d\n", PEER_ARGS(sock_to_rpc(sock->sk)),
-	      len, ret);
+
+#ifdef CONFIG_FUSE_KIO_DEBUG
+	rcu_read_lock();
+	sio = rcu_dereference_sk_user_data(sock->sk);
+	if (sio) {
+		TRACE("RET: "PEER_FMT" len:%ld ret:%d\n", PEER_ARGS(sio->parent),
+		      len, ret);
+	}
+	rcu_read_unlock();
+#endif /* CONFIG_FUSE_KIO_DEBUG */
+
 	return ret;
 }
 
@@ -469,7 +473,7 @@ static void pcs_restore_sockets(struct pcs_ioconn *ioconn)
 	sk = ioconn->socket->sk;
 
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_user_data =    ioconn->orig.user_data;
+	rcu_assign_sk_user_data(sk, ioconn->orig.user_data);
 	sk->sk_data_ready =   ioconn->orig.data_ready;
 	sk->sk_write_space =  ioconn->orig.write_space;
 	sk->sk_error_report = ioconn->orig.error_report;
@@ -480,6 +484,14 @@ static void pcs_restore_sockets(struct pcs_ioconn *ioconn)
 	sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
 }
 
+static void sio_destroy_rcu(struct rcu_head *head)
+{
+	struct pcs_sockio *sio = container_of(head, struct pcs_sockio, rcu);
+
+	memset(sio, 0xFF, sizeof(*sio));
+	kfree(sio);
+}
+
 void pcs_sock_ioconn_destruct(struct pcs_ioconn *ioconn)
 {
 	struct pcs_sockio * sio = sio_from_ioconn(ioconn);
@@ -490,20 +502,24 @@ void pcs_sock_ioconn_destruct(struct pcs_ioconn *ioconn)
 
 	pcs_ioconn_close(ioconn);
 
-	memset(sio, 0xFF, sizeof(*sio));
-	kfree(sio);
+	/* Wait pending socket callbacks, e.g., sk_data_ready() */
+	call_rcu(&sio->rcu, sio_destroy_rcu);
 }
 
 static void pcs_sk_kick_queue(struct sock *sk)
 {
 	struct pcs_sockio *sio;
 
-	sio = sk->sk_user_data;;
+	smp_rmb(); /* Pairs with smp_wmb() in pcs_sockio_init() */
+
+	rcu_read_lock();
+	sio = rcu_dereference_sk_user_data(sk);
 	if (sio) {
 		struct pcs_rpc *ep = sio->parent;
 		TRACE(PEER_FMT" queue\n", PEER_ARGS(ep));
 		pcs_rpc_kick_queue(ep);
 	}
+	rcu_read_unlock();
 }
 
 static void pcs_sk_data_ready(struct sock *sk, int count)
@@ -518,14 +534,24 @@ static void pcs_sk_write_space(struct sock *sk)
 /* TODO this call back does not look correct, sane locking/error handling is required */
 static void pcs_sk_error_report(struct sock *sk)
 {
-	struct pcs_sockio * sio = sio_from_ioconn(sk->sk_user_data);
+	struct pcs_sockio *sio;
 
-	if (test_bit(PCS_IOCONN_BF_DEAD, &sio->ioconn.flags) ||
-		test_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags))
-		return;
+	smp_rmb(); /* Pairs with smp_wmb() in pcs_sockio_init() */
 
-	set_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags);
-	pcs_rpc_kick_queue(sio->parent);
+	rcu_read_lock();
+	sio = rcu_dereference_sk_user_data(sk);
+	if (sio) {
+		struct pcs_rpc *ep = sio->parent;
+
+		if (test_bit(PCS_IOCONN_BF_DEAD, &sio->ioconn.flags) ||
+		    test_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags))
+			goto unlock;
+
+		set_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags);
+		pcs_rpc_kick_queue(ep);
+	}
+unlock:
+	rcu_read_unlock();
 }
 
 struct pcs_sockio * pcs_sockio_init(struct socket *sock,
@@ -549,7 +575,13 @@ struct pcs_sockio * pcs_sockio_init(struct socket *sock,
 	sk = sock->sk;
 	write_lock_bh(&sk->sk_callback_lock);
 
-	/* Backup original callbaks */
+	/*
+	 * Backup original callbaks.
+	 * TCP and unix sockets do not have sk_user_data set.
+	 * So we avoid taking sk_callback_lock in callbacks,
+	 * since this seems to be able to result in performance.
+	 */
+	WARN_ON_ONCE(sk->sk_user_data);
 	sio->ioconn.orig.user_data = sk->sk_user_data;
 	sio->ioconn.orig.data_ready = sk->sk_data_ready;
 	sio->ioconn.orig.write_space = sk->sk_write_space;
@@ -564,7 +596,8 @@ struct pcs_sockio * pcs_sockio_init(struct socket *sock,
 	sio->ioconn.socket = sock;
 	sio->ioconn.destruct = pcs_sock_ioconn_destruct;
 
-	sk->sk_user_data = sio;
+	rcu_assign_sk_user_data(sk, sio);
+	smp_wmb(); /* Pairs with smp_rmb() in callbacks */
 	sk->sk_data_ready = pcs_sk_data_ready;
 	sk->sk_write_space = pcs_sk_write_space;
 	sk->sk_error_report = pcs_sk_error_report;
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.h b/fs/fuse/kio/pcs/pcs_sock_io.h
index e2fceee46383..30771170b84c 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.h
+++ b/fs/fuse/kio/pcs/pcs_sock_io.h
@@ -137,6 +137,7 @@ struct pcs_sockio
 	/* eof() handler could be called twice: once on graceful socket shutdown and from sio_abort() */
 	void			(*eof)(struct pcs_sockio *);
 	void			(*write_wakeup)(struct pcs_sockio *);
+	struct rcu_head		rcu;
 
 	char			_inline_buffer[0];
 };



More information about the Devel mailing list