[Devel] [PATCH RHEL9 COMMIT] fuse: pcs: rpc timeout was incoherent

Konstantin Khorenko khorenko at virtuozzo.com
Wed Nov 1 22:48:16 MSK 2023


The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-284.25.1.vz9.30.8
------>
commit e8e06f1b730e1d1112914d8aa17fb71650c45cba
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date:   Fri Oct 6 18:43:55 2023 +0800

    fuse: pcs: rpc timeout was incoherent
    
    The code from user space was ported incorrectly without understanding
    how this actually works. This can result in lockup of failing connection.
    
    We have two timeouts - per-message timeout, when we cancel timed out request,
    but assume this is because of semantics of the request, f.e. CS needs to talk
    to another CS or to MDS, and that communitacion fails, which obviously does
    not mean _this_ connection is bad. And longer per-socket timeout, which aborts
    the whole connection. After per-message timeout request is removed from queues
    ans socket starts to look idle, but it would be huge mistake to cancel per-socket
    timer. If we do (and we really did before this patch), when we have failing socket,
    we never sense the failure!
    
    https://pmc.acronis.work/browse/VSTOR-54040
    
    Signed-off-by: Alexey Kuznetsov <kuznet at acronis.com>
    
    Feature: vStorage
---
 fs/fuse/kio/pcs/pcs_rpc.c     | 46 ++++++++++++++++++++++++++++++++++---------
 fs/fuse/kio/pcs/pcs_rpc.h     |  1 +
 fs/fuse/kio/pcs/pcs_sock_io.c |  2 +-
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_rpc.c b/fs/fuse/kio/pcs/pcs_rpc.c
index 4f49b4eb60dd..8f545f952e1e 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.c
+++ b/fs/fuse/kio/pcs/pcs_rpc.c
@@ -458,6 +458,7 @@ void pcs_rpc_enable(struct pcs_rpc * ep, int error)
 	TRACE("ep(%p)->state: WORK\n", ep);
 	ep->state = PCS_RPC_WORK;
 	ep->retries = 0;
+	ep->kill_deadline = 0;
 	queue_work(cc->wq, &ep->work);
 }
 
@@ -472,12 +473,16 @@ static void handle_response(struct pcs_rpc * ep, struct pcs_msg * msg)
 	 * for itself.
 	 */
 	pcs_msg_io_start(msg, pcs_free_msg);
+
+	ep->kill_deadline = 0;
+
 	req = pcs_rpc_lookup_xid(ep, &h->xid);
 	if (req == NULL)
 		goto drop;
 
 	pcs_msg_del_calendar(req);
 	list_del(&req->list);
+
 	if (h->type == PCS_RPC_ERROR_RESP) {
 		struct pcs_rpc_error_resp * eh = (struct pcs_rpc_error_resp *)msg->_inline_buffer;
 
@@ -535,6 +540,7 @@ static void handle_keep_waiting(struct pcs_rpc * ep, struct pcs_msg * msg)
 	if (req->stage == PCS_MSG_STAGE_WAIT) {
 		req->start_time = jiffies;
 		list_move_tail(&req->list, &ep->pending_queue);
+		ep->kill_deadline = 0;
 	}
 }
 
@@ -808,6 +814,11 @@ static void calendar_work(struct work_struct *w)
 			msg->stage = PCS_MSG_STAGE_SENT;
 			pcs_set_rpc_error(&msg->error, PCS_ERR_WRITE_TIMEOUT, msg->rpc);
 		}
+		if (timer_pending(&ep->timer_work.timer)) {
+			unsigned long tmo = ep->timer_work.timer.expires;
+			if (!ep->kill_deadline || time_before(tmo, ep->kill_deadline))
+				ep->kill_deadline = tmo;
+		}
 		BUG_ON(!hlist_unhashed(&msg->kill_link));
 		msg->done(msg);
 		count++;
@@ -834,26 +845,32 @@ static void update_xmit_timeout(struct pcs_rpc *ep)
 	struct pcs_netio *netio = (struct pcs_netio *)ep->conn;
 	struct pcs_cluster_core *cc = cc_from_rpc(ep->eng);
 	struct pcs_msg * msg;
-	unsigned long timeout = 0;
+	unsigned long timeout = ep->kill_deadline;
 	unsigned long tx;
 
 	BUG_ON(ep->state != PCS_RPC_WORK);
 
-	if (list_empty(&ep->pending_queue) && !netio->tops->next_timeout(netio)) {
+	tx = netio->tops->next_timeout(netio);
+
+	if (list_empty(&ep->pending_queue) && !tx && !timeout) {
 		if (timer_pending(&ep->timer_work.timer))
 			cancel_delayed_work(&ep->timer_work);
 		return;
 	}
+
+	if (tx) {
+		if (!timeout || time_before(tx, timeout))
+			timeout = tx;
+	}
+
 	if (!list_empty(&ep->pending_queue)) {
 		msg = list_first_entry(&ep->pending_queue, struct pcs_msg, list);
 
-		timeout = msg->start_time + ep->params.response_timeout;
-	}
-	if (netio->tops->next_timeout(netio)) {
-		tx = netio->tops->next_timeout(netio);
-		if (time_after(tx, timeout))
+		tx = msg->start_time + ep->params.response_timeout;
+		if (!timeout || time_before(tx, timeout))
 			timeout = tx;
 	}
+
 	if (time_is_before_jiffies(timeout))
 		timeout = 0;
 	else
@@ -1291,11 +1308,22 @@ static void timer_work(struct work_struct *w)
 		break;
 
 	case PCS_RPC_WORK: {
-		int err = list_empty(&ep->pending_queue) ? PCS_ERR_RESPONSE_TIMEOUT : PCS_ERR_WRITE_TIMEOUT;
+		int err = PCS_ERR_RESPONSE_TIMEOUT                        ;
+
+		/* We do not really know what timeout expired and this is not very essential,
+		 * just make a guess.
+		 */
+		if (list_empty(&ep->pending_queue)) {
+			struct pcs_netio *netio = (struct pcs_netio *)ep->conn;
+			unsigned long tx = netio->tops->next_timeout(netio);
+
+			if (tx && !time_is_after_jiffies(tx))
+				err = PCS_ERR_WRITE_TIMEOUT;
+		}
 
 		pcs_rpc_report_error(ep, PCS_RPC_ERR_RESPONSE_TOUT);
 		FUSE_KTRACE(cc_from_rpc(ep->eng)->fc, "rpc timer expired, killing connection to " PEER_FMT ", %d",
-		      PEER_ARGS(ep), err);
+			    PEER_ARGS(ep), err);
 		rpc_abort(ep, 0, err);
 		break;
 	}
diff --git a/fs/fuse/kio/pcs/pcs_rpc.h b/fs/fuse/kio/pcs/pcs_rpc.h
index 6e9f75145bcc..2ff4494a7cd4 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.h
+++ b/fs/fuse/kio/pcs/pcs_rpc.h
@@ -110,6 +110,7 @@ struct pcs_rpc
 	char			peer_build_version[MAX_BUILD_VERSION_LENGTH+1];
 	struct work_struct	work;
 	struct delayed_work	timer_work;
+	unsigned long		kill_deadline;
 	PCS_NET_ADDR_T		addr;
 /* TODO Reanable local sockets */
 #if 0
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index 710940eafa34..07b17ed5e848 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -286,7 +286,7 @@ static void pcs_sockio_recv(struct pcs_sockio *sio)
 				TRACE(PEER_FMT" msg:%p read_off:%d iov_size:%ld\n", PEER_ARGS(ep), msg, sio->read_offset,
 				      iov_iter_count(it));
 			} else {
-				if (n == -EAGAIN || n == 0)
+				if (n == -EAGAIN)
 					return;
 
 				sio_abort(sio, PCS_ERR_NET_ABORT);


More information about the Devel mailing list