[Devel] [PATCH VZ9 16/20] fuse: pcs: rpc timeout was incoherent

Alexey Kuznetsov kuznet at virtuozzo.com
Fri Oct 6 13:43:55 MSK 2023


The code from user space was ported incorrectly without understanding
how this actually works. This can result in lockup of failing connection.

We have two timeouts - per-message timeout, when we cancel timed out request,
but assume this is because of semantics of the request, f.e. CS needs to talk
to another CS or to MDS, and that communitacion fails, which obviously does
not mean _this_ connection is bad. And longer per-socket timeout, which aborts
the whole connection. After per-message timeout request is removed from queues
ans socket starts to look idle, but it would be huge mistake to cancel per-socket
timer. If we do (and we really did before this patch), when we have failing socket,
we never sense the failure!

Signed-off-by: Alexey Kuznetsov <kuznet at acronis.com>
---
 fs/fuse/kio/pcs/pcs_rpc.c     | 45 ++++++++++++++++++++++++++++++++++---------
 fs/fuse/kio/pcs/pcs_rpc.h     |  1 +
 fs/fuse/kio/pcs/pcs_sock_io.c |  2 +-
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_rpc.c b/fs/fuse/kio/pcs/pcs_rpc.c
index 4f49b4e..8631f8e 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.c
+++ b/fs/fuse/kio/pcs/pcs_rpc.c
@@ -472,12 +472,16 @@ static void handle_response(struct pcs_rpc * ep, struct pcs_msg * msg)
 	 * for itself.
 	 */
 	pcs_msg_io_start(msg, pcs_free_msg);
+
+	ep->kill_deadline = 0;
+
 	req = pcs_rpc_lookup_xid(ep, &h->xid);
 	if (req == NULL)
 		goto drop;
 
 	pcs_msg_del_calendar(req);
 	list_del(&req->list);
+
 	if (h->type == PCS_RPC_ERROR_RESP) {
 		struct pcs_rpc_error_resp * eh = (struct pcs_rpc_error_resp *)msg->_inline_buffer;
 
@@ -535,6 +539,7 @@ static void handle_keep_waiting(struct pcs_rpc * ep, struct pcs_msg * msg)
 	if (req->stage == PCS_MSG_STAGE_WAIT) {
 		req->start_time = jiffies;
 		list_move_tail(&req->list, &ep->pending_queue);
+		ep->kill_deadline = 0;
 	}
 }
 
@@ -808,6 +813,11 @@ static void calendar_work(struct work_struct *w)
 			msg->stage = PCS_MSG_STAGE_SENT;
 			pcs_set_rpc_error(&msg->error, PCS_ERR_WRITE_TIMEOUT, msg->rpc);
 		}
+		if (timer_pending(&ep->timer_work.timer)) {
+			unsigned long tmo = ep->timer_work.timer.expires;
+			if (!ep->kill_deadline || time_before(tmo, ep->kill_deadline))
+				ep->kill_deadline = tmo;
+		}
 		BUG_ON(!hlist_unhashed(&msg->kill_link));
 		msg->done(msg);
 		count++;
@@ -834,26 +844,32 @@ static void update_xmit_timeout(struct pcs_rpc *ep)
 	struct pcs_netio *netio = (struct pcs_netio *)ep->conn;
 	struct pcs_cluster_core *cc = cc_from_rpc(ep->eng);
 	struct pcs_msg * msg;
-	unsigned long timeout = 0;
+	unsigned long timeout = ep->kill_deadline;
 	unsigned long tx;
 
 	BUG_ON(ep->state != PCS_RPC_WORK);
 
-	if (list_empty(&ep->pending_queue) && !netio->tops->next_timeout(netio)) {
+	tx = netio->tops->next_timeout(netio);
+
+	if (list_empty(&ep->pending_queue) && !tx && !timeout) {
 		if (timer_pending(&ep->timer_work.timer))
 			cancel_delayed_work(&ep->timer_work);
 		return;
 	}
+
+	if (tx) {
+		if (!timeout || time_before(tx, timeout))
+			timeout = tx;
+	}
+
 	if (!list_empty(&ep->pending_queue)) {
 		msg = list_first_entry(&ep->pending_queue, struct pcs_msg, list);
 
-		timeout = msg->start_time + ep->params.response_timeout;
-	}
-	if (netio->tops->next_timeout(netio)) {
-		tx = netio->tops->next_timeout(netio);
-		if (time_after(tx, timeout))
+		tx = msg->start_time + ep->params.response_timeout;
+		if (!timeout || time_before(tx, timeout))
 			timeout = tx;
 	}
+
 	if (time_is_before_jiffies(timeout))
 		timeout = 0;
 	else
@@ -1291,11 +1307,22 @@ static void timer_work(struct work_struct *w)
 		break;
 
 	case PCS_RPC_WORK: {
-		int err = list_empty(&ep->pending_queue) ? PCS_ERR_RESPONSE_TIMEOUT : PCS_ERR_WRITE_TIMEOUT;
+		int err = PCS_ERR_RESPONSE_TIMEOUT                        ;
+
+		/* We do not really know what timeout expired and this is not very essential,
+		 * just make a guess.
+		 */
+		if (list_empty(&ep->pending_queue)) {
+			struct pcs_netio *netio = (struct pcs_netio *)ep->conn;
+			unsigned long tx = netio->tops->next_timeout(netio);
+
+			if (tx && !time_is_after_jiffies(tx))
+				err = PCS_ERR_WRITE_TIMEOUT;
+		}
 
 		pcs_rpc_report_error(ep, PCS_RPC_ERR_RESPONSE_TOUT);
 		FUSE_KTRACE(cc_from_rpc(ep->eng)->fc, "rpc timer expired, killing connection to " PEER_FMT ", %d",
-		      PEER_ARGS(ep), err);
+			    PEER_ARGS(ep), err);
 		rpc_abort(ep, 0, err);
 		break;
 	}
diff --git a/fs/fuse/kio/pcs/pcs_rpc.h b/fs/fuse/kio/pcs/pcs_rpc.h
index 6e9f751..2ff4494 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.h
+++ b/fs/fuse/kio/pcs/pcs_rpc.h
@@ -110,6 +110,7 @@ struct pcs_rpc
 	char			peer_build_version[MAX_BUILD_VERSION_LENGTH+1];
 	struct work_struct	work;
 	struct delayed_work	timer_work;
+	unsigned long		kill_deadline;
 	PCS_NET_ADDR_T		addr;
 /* TODO Reanable local sockets */
 #if 0
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index 710940e..07b17ed 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -286,7 +286,7 @@ static void pcs_sockio_recv(struct pcs_sockio *sio)
 				TRACE(PEER_FMT" msg:%p read_off:%d iov_size:%ld\n", PEER_ARGS(ep), msg, sio->read_offset,
 				      iov_iter_count(it));
 			} else {
-				if (n == -EAGAIN || n == 0)
+				if (n == -EAGAIN)
 					return;
 
 				sio_abort(sio, PCS_ERR_NET_ABORT);
-- 
1.8.3.1



More information about the Devel mailing list