[Devel] [PATCH RHEL9 COMMIT] fs/fuse kio: destroy rdma_cm_id immediately in case cm fails during connection establishment

Konstantin Khorenko khorenko at virtuozzo.com
Tue Jan 2 16:35:04 MSK 2024


The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.27.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-284.25.1.vz9.27.5
------>
commit 47f1df754353d2e6ae0402d2e15b58351fa6e18f
Author: Liu Kui <kui.liu at acronis.com>
Date:   Tue Jan 2 16:53:08 2024 +0800

    fs/fuse kio: destroy rdma_cm_id immediately in case cm fails during connection establishment
    
    Previously, if cm fails after the rio has been created, the rdma_cm_id
    would not be destroyed immediately. However the cm_id->context could
    still point to rc->id which would no longer be valid. This dealy create
    a window during which cm_id->context holds an illegal pointer. If an RMDA
    cm event arrives during this window, an illegal pointer dereference will
    happen, thus crashing the system.
    
    https://pmc.acronis.work/browse/VSTOR-79838
    
    Signed-off-by: Liu Kui <kui.liu at acronis.com>
    
    Feature: vStorage
---
 fs/fuse/kio/pcs/pcs_rdma_conn.c | 16 +++++++++-------
 fs/fuse/kio/pcs/pcs_rdma_io.c   | 29 ++++++++++++++++++++++-------
 fs/fuse/kio/pcs/pcs_rdma_io.h   |  2 +-
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_rdma_conn.c b/fs/fuse/kio/pcs/pcs_rdma_conn.c
index 96fc2b1ed281..846106a59d50 100644
--- a/fs/fuse/kio/pcs/pcs_rdma_conn.c
+++ b/fs/fuse/kio/pcs/pcs_rdma_conn.c
@@ -77,7 +77,6 @@ static int pcs_rdma_cm_event_handler(struct rdma_cm_id *cmid,
 				complete(&rc->cm_done);
 				break;
 			}
-			rc->cmid = NULL;
 
 			conn_param_init(&conn_param, &rc->rio->conn_req, cmid);
 			if (rdma_connect_locked(cmid, &conn_param)) {
@@ -87,10 +86,6 @@ static int pcs_rdma_cm_event_handler(struct rdma_cm_id *cmid,
 			break;
 		case RDMA_CM_EVENT_ESTABLISHED:
 			cmid->context = &rc->rio->id;
-			if (pcs_rdma_established(rc->rio)) {
-				TRACE("pcs_rdma_established failed, rio: 0x%p\n", rc->rio);
-				rc->cm_event = RDMA_CM_EVENT_REJECTED;
-			}
 			complete(&rc->cm_done);
 			break;
 		case RDMA_CM_EVENT_REJECTED:
@@ -166,6 +161,14 @@ void pcs_rdmaconnect_start(struct pcs_rpc *ep)
 		ep->flags |= PCS_RPC_F_PEER_ID;
 
 	ep->state = PCS_RPC_AUTH;
+
+	/* setup rxs */
+	if (pcs_rdma_setup_rxs((rc.rio))) {
+		TRACE("pcs_rdma_setup_rxs failed, rio: 0x%p\n", rc.rio);
+		pcs_rpc_report_error(ep, PCS_RPC_ERR_CONNECT_ERROR);
+		goto fail;
+	}
+
 	ret = rpc_client_start_auth(ep, PCS_AUTH_DIGEST,
 				    cc_from_rpc(ep->eng)->cluster_name);
 	if (ret < 0) {
@@ -186,8 +189,7 @@ void pcs_rdmaconnect_start(struct pcs_rpc *ep)
 fail_cm:
 	if (rc.rio)
 		pcs_rdma_destroy(rc.rio);
-	if (rc.cmid)
-		rdma_destroy_id(rc.cmid);
+	rdma_destroy_id(rc.cmid);
 fail:
 	pcs_rpc_reset(ep);
 	return;
diff --git a/fs/fuse/kio/pcs/pcs_rdma_io.c b/fs/fuse/kio/pcs/pcs_rdma_io.c
index 62d138c8b611..622ce72c5a85 100644
--- a/fs/fuse/kio/pcs/pcs_rdma_io.c
+++ b/fs/fuse/kio/pcs/pcs_rdma_io.c
@@ -1322,7 +1322,7 @@ struct pcs_rdmaio* pcs_rdma_create(int hdr_size, struct rdma_cm_id *cmid,
 	return NULL;
 }
 
-int pcs_rdma_established(struct pcs_rdmaio *rio)
+int pcs_rdma_setup_rxs(struct pcs_rdmaio *rio)
 {
 	struct rio_rx *rx;
 
@@ -1444,20 +1444,35 @@ static void rio_destroy(struct work_struct *work)
 
 static DECLARE_WORK(rio_destroy_work, rio_destroy);
 
+/*
+ * This is only used in case cm error happens during establishing
+ * a connection. We need to destroy the rio immediately such that
+ * the rdma_cm_id can be destroyed immediately afterwards.
+ */
 void pcs_rdma_destroy(struct pcs_rdmaio *rio)
 {
-	struct pcs_netio *netio = &rio->netio;
-	struct pcs_rpc *ep = netio->parent;
+	struct pcs_rpc *ep = rio->netio.parent;
+	int i;
 
 	TRACE("rio: 0x%p\n", rio);
 
 	BUG_ON(!mutex_is_locked(&ep->mutex));
 
-	netio->eof = NULL;
-	rio_abort(rio, PCS_ERR_NET_ABORT);
+	rio->rio_state = RIO_STATE_ABORTED;
+	rio->rio_error = PCS_ERR_NET_ABORT;
 
-	if (llist_add(&rio->destroy_node, &rio_destroy_list))
-		queue_work(pcs_cleanup_wq, &rio_destroy_work);
+	rdma_disconnect(rio->cmid);
+
+	rdma_destroy_qp(rio->cmid);
+	ib_destroy_cq(rio->cq);
+
+	pcs_rdma_device_destroy(rio->dev);
+	for (i = 0; i < rio->recv_queue_depth; i++)
+		rio_fini_rx(rio->rx_descs + i, rio->cmid->device);
+	kfree(rio->rx_descs);
+
+	pcs_rpc_put(ep);
+	kfree(rio);
 }
 
 void pcs_rdma_ioconn_destruct(struct pcs_ioconn *ioconn)
diff --git a/fs/fuse/kio/pcs/pcs_rdma_io.h b/fs/fuse/kio/pcs/pcs_rdma_io.h
index 18962208e4a2..87cae3764f23 100644
--- a/fs/fuse/kio/pcs/pcs_rdma_io.h
+++ b/fs/fuse/kio/pcs/pcs_rdma_io.h
@@ -113,7 +113,7 @@ struct pcs_rdmaio
 
 struct pcs_rdmaio* pcs_rdma_create(int hdr_size, struct rdma_cm_id *cmid,
 		int queue_depth, struct pcs_rpc *ep);
-int pcs_rdma_established(struct pcs_rdmaio *rio);
+int pcs_rdma_setup_rxs(struct pcs_rdmaio *rio);
 void pcs_rdma_destroy(struct pcs_rdmaio *rio);
 void pcs_rdma_ioconn_destruct(struct pcs_ioconn *ioconn);
 


More information about the Devel mailing list