[Devel] [PATCH RHEL7 COMMIT] fuse kio: Dereference sk_user_data under rcu
Konstantin Khorenko
khorenko at virtuozzo.com
Tue Oct 16 18:09:44 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-862.14.4.vz7.72.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.14.4.vz7.72.9
------>
commit 79c2293509a5ea2eadc7eb1c37d6c25bc9125c4d
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Tue Oct 16 18:09:42 2018 +0300
fuse kio: Dereference sk_user_data under rcu
We need to synchronize sio destruction with socket callbacks
to prevent them touch sio freed memory. Use RCU to do that.
Note, that pcs_sock_ioconn_destruct() will become used in next
patch.
v2: Use rcu callback instead of work func
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Reviewed-by: Pavel Butsykin <pbutsykin at virtuozzo.com>
=====================
Patchset description:
Order pcs_rpc and pcs_sockio destruction and close leaked socket
https://pmc.acronis.com/browse/VSTOR-15305
Ploop can asynchronously unmap regions by sending IOCB_CMD_UNMAP_ITER, but this
command isn't quite correctly interpreted in Fuse. Moreover, in Fast-path mode,
fallocate(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_ZERO_RANGE) falls to fuse user daemon
and it can lead to data corruption.
Let's fix it.
Kirill Tkhai (9):
fuse kio: Use __maybe_unused
fuse kio: Use sio eof instead of parent to determ abort
fuse kio: Reorder callback assignment
fuse kio: Add pcs_cleanup_wq
fuse kio: Destroy rpc in work func
fuse kio: Introduce pcs_sk_kick_queue()
fuse kio: Dereference sk_user_data under rcu
fuse kio: Fix rpc socket leak on rpc_abort()
fuse kio: Hold pcs_rpc counter till sio may be freed
---
fs/fuse/kio/pcs/pcs_sock_io.c | 73 +++++++++++++++++++++++++++++++------------
fs/fuse/kio/pcs/pcs_sock_io.h | 1 +
2 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index d99046f05834..bcf4bf6745a8 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -11,12 +11,6 @@
#include "log.h"
-static inline struct pcs_rpc * sock_to_rpc(struct sock *sk)
-{
-
- return ((struct pcs_sockio *)sk->sk_user_data)->parent;
-}
-
static void sio_msg_sent(struct pcs_msg * msg)
{
msg->stage = PCS_MSG_STAGE_SENT;
@@ -191,6 +185,7 @@ static int do_sock_recv(struct socket *sock, void *buf, size_t len)
struct kvec iov = {buf, len};
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ struct pcs_sockio __maybe_unused *sio;
int ret = -EIO;
if (pcs_should_fail_sock_io())
@@ -198,8 +193,17 @@ static int do_sock_recv(struct socket *sock, void *buf, size_t len)
ret = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
out:
- TRACE("RET: "PEER_FMT" len:%ld ret:%d\n", PEER_ARGS(sock_to_rpc(sock->sk)),
- len, ret);
+
+#ifdef CONFIG_FUSE_KIO_DEBUG
+ rcu_read_lock();
+ sio = rcu_dereference_sk_user_data(sock->sk);
+ if (sio) {
+ TRACE("RET: "PEER_FMT" len:%ld ret:%d\n", PEER_ARGS(sio->parent),
+ len, ret);
+ }
+ rcu_read_unlock();
+#endif /* CONFIG_FUSE_KIO_DEBUG */
+
return ret;
}
@@ -469,7 +473,7 @@ static void pcs_restore_sockets(struct pcs_ioconn *ioconn)
sk = ioconn->socket->sk;
write_lock_bh(&sk->sk_callback_lock);
- sk->sk_user_data = ioconn->orig.user_data;
+ rcu_assign_sk_user_data(sk, ioconn->orig.user_data);
sk->sk_data_ready = ioconn->orig.data_ready;
sk->sk_write_space = ioconn->orig.write_space;
sk->sk_error_report = ioconn->orig.error_report;
@@ -480,6 +484,14 @@ static void pcs_restore_sockets(struct pcs_ioconn *ioconn)
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
}
+static void sio_destroy_rcu(struct rcu_head *head)
+{
+ struct pcs_sockio *sio = container_of(head, struct pcs_sockio, rcu);
+
+ memset(sio, 0xFF, sizeof(*sio));
+ kfree(sio);
+}
+
void pcs_sock_ioconn_destruct(struct pcs_ioconn *ioconn)
{
struct pcs_sockio * sio = sio_from_ioconn(ioconn);
@@ -490,20 +502,24 @@ void pcs_sock_ioconn_destruct(struct pcs_ioconn *ioconn)
pcs_ioconn_close(ioconn);
- memset(sio, 0xFF, sizeof(*sio));
- kfree(sio);
+ /* Wait pending socket callbacks, e.g., sk_data_ready() */
+ call_rcu(&sio->rcu, sio_destroy_rcu);
}
static void pcs_sk_kick_queue(struct sock *sk)
{
struct pcs_sockio *sio;
- sio = sk->sk_user_data;;
+ smp_rmb(); /* Pairs with smp_wmb() in pcs_sockio_init() */
+
+ rcu_read_lock();
+ sio = rcu_dereference_sk_user_data(sk);
if (sio) {
struct pcs_rpc *ep = sio->parent;
TRACE(PEER_FMT" queue\n", PEER_ARGS(ep));
pcs_rpc_kick_queue(ep);
}
+ rcu_read_unlock();
}
static void pcs_sk_data_ready(struct sock *sk, int count)
@@ -518,14 +534,24 @@ static void pcs_sk_write_space(struct sock *sk)
/* TODO this call back does not look correct, sane locking/error handling is required */
static void pcs_sk_error_report(struct sock *sk)
{
- struct pcs_sockio * sio = sio_from_ioconn(sk->sk_user_data);
+ struct pcs_sockio *sio;
- if (test_bit(PCS_IOCONN_BF_DEAD, &sio->ioconn.flags) ||
- test_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags))
- return;
+ smp_rmb(); /* Pairs with smp_wmb() in pcs_sockio_init() */
- set_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags);
- pcs_rpc_kick_queue(sio->parent);
+ rcu_read_lock();
+ sio = rcu_dereference_sk_user_data(sk);
+ if (sio) {
+ struct pcs_rpc *ep = sio->parent;
+
+ if (test_bit(PCS_IOCONN_BF_DEAD, &sio->ioconn.flags) ||
+ test_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags))
+ goto unlock;
+
+ set_bit(PCS_IOCONN_BF_ERROR, &sio->ioconn.flags);
+ pcs_rpc_kick_queue(ep);
+ }
+unlock:
+ rcu_read_unlock();
}
struct pcs_sockio * pcs_sockio_init(struct socket *sock,
@@ -549,7 +575,13 @@ struct pcs_sockio * pcs_sockio_init(struct socket *sock,
sk = sock->sk;
write_lock_bh(&sk->sk_callback_lock);
- /* Backup original callbaks */
+ /*
+ * Backup original callbaks.
+ * TCP and unix sockets do not have sk_user_data set.
+ * So we avoid taking sk_callback_lock in callbacks,
+ * since this seems to be able to result in performance.
+ */
+ WARN_ON_ONCE(sk->sk_user_data);
sio->ioconn.orig.user_data = sk->sk_user_data;
sio->ioconn.orig.data_ready = sk->sk_data_ready;
sio->ioconn.orig.write_space = sk->sk_write_space;
@@ -564,7 +596,8 @@ struct pcs_sockio * pcs_sockio_init(struct socket *sock,
sio->ioconn.socket = sock;
sio->ioconn.destruct = pcs_sock_ioconn_destruct;
- sk->sk_user_data = sio;
+ rcu_assign_sk_user_data(sk, sio);
+ smp_wmb(); /* Pairs with smp_rmb() in callbacks */
sk->sk_data_ready = pcs_sk_data_ready;
sk->sk_write_space = pcs_sk_write_space;
sk->sk_error_report = pcs_sk_error_report;
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.h b/fs/fuse/kio/pcs/pcs_sock_io.h
index e2fceee46383..30771170b84c 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.h
+++ b/fs/fuse/kio/pcs/pcs_sock_io.h
@@ -137,6 +137,7 @@ struct pcs_sockio
/* eof() handler could be called twice: once on graceful socket shutdown and from sio_abort() */
void (*eof)(struct pcs_sockio *);
void (*write_wakeup)(struct pcs_sockio *);
+ struct rcu_head rcu;
char _inline_buffer[0];
};
More information about the Devel
mailing list