[Devel] [PATCH RHEL7 COMMIT] fs/fuse kio: relax congestion avoidance limits (backport from usermode)

Konstantin Khorenko khorenko at virtuozzo.com
Mon Apr 15 15:55:13 MSK 2019


The commit is pushed to "branch-rh7-3.10.0-957.10.1.vz7.94.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.10.1.vz7.94.13
------>
commit d943b6f98c259324dc2e447714c8745b1675fc64
Author: Pavel Butsykin <pbutsykin at virtuozzo.com>
Date:   Mon Apr 15 15:55:11 2019 +0300

    fs/fuse kio: relax congestion avoidance limits (backport from usermode)
    
    Investigation of VZ US-QA cluster shows that congestion window
    reduction after idle periods results in too slow window open
    after data start to flow again.
    
    So, introduce ssthresh to allow faster window open after
    idle periods.
    
    Maybe, even this is not enough and window should be open
    even more aggressively. Further observations will show.
    
    Signed-off-by: Pavel Butsykin <pbutsykin at virtuozzo.com>
    
    =====================
    Patchset description:
    
    KIO performance fixes
    
    This patch-set aims to fix the performance issue with single-thread sequential
    async reads.
    
    https://pmc.acronis.com/browse/VSTOR-11050
    
    Acked-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
    
    Pavel Butsykin (4):
      fs/fuse kio: fix a typo in worth_to_grow()
      fs/fuse kio: relax congestion avoidance limits (backport from usermode)
      fs/fuse kio: add missed sock write in pcs_sock_sendmsg()
      fs/fuse kio: export io_locality
---
 fs/fuse/kio/pcs/pcs_cs.c  | 26 +++++++++++++++++++++-----
 fs/fuse/kio/pcs/pcs_cs.h  |  1 +
 fs/fuse/kio/pcs/pcs_map.c |  7 ++++++-
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index 48e3b3cedc4e..dc7d01387beb 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -62,6 +62,7 @@ struct pcs_cs *pcs_cs_alloc(struct pcs_cs_set *css,
 	cs->in_flight = 0;
 	cs->cwnd = PCS_CS_INIT_CWND;
 	cs->eff_cwnd = PCS_CS_INIT_CWND;
+	cs->ssthresh = PCS_CS_INIT_CWND;
 	cs->cwr_state = 0;
 	atomic_set(&cs->latency_avg, 0);
 	cs->net_latency_avg = 0;
@@ -607,6 +608,10 @@ static void handle_congestion(struct pcs_cs *cs, struct pcs_rpc_hdr *h)
 		 * to half of min(in_flight, cwnd) and enter congestion reduction state,
 		 * where we ignore further congestion notifications until window is reduced
 		 */
+		if (who->cwnd >= PCS_CS_INIT_CWND)
+			who->ssthresh = who->cwnd;
+		else
+			who->ssthresh = PCS_CS_INIT_CWND;
 		if (who->in_flight < who->cwnd)
 			who->cwnd = who->in_flight;
 		who->cwnd /= 2;
@@ -663,8 +668,12 @@ static void cs_keep_waiting(struct pcs_rpc *ep, struct pcs_msg *req, struct pcs_
 		}
 
 		if (!who->cwr_state) {
-			FUSE_KDTRACE(cc_from_csset(cs->css)->fc, "Congestion window on CS" NODE_FMT " reducing %d/%d/%d", NODE_ARGS(h->xid.origin),
-				     who->in_flight, who->eff_cwnd, who->cwnd);
+			FUSE_KTRACE(cc_from_csset(cs->css)->fc, "Congestion window on CS" NODE_FMT " reducing %d/%d/%d", NODE_ARGS(h->xid.origin),
+				    who->in_flight, who->eff_cwnd, who->cwnd);
+			if (who->cwnd >= PCS_CS_INIT_CWND)
+				who->ssthresh = who->cwnd;
+			else
+				who->ssthresh = PCS_CS_INIT_CWND;
 			if (who->in_flight < who->cwnd)
 				who->cwnd = who->in_flight;
 			who->cwnd /= 2;
@@ -903,9 +912,14 @@ unsigned int cs_get_avg_in_flight(struct pcs_cs *cs)
 				cs->in_flight_avg >>= interval;
 			}
 			if (cs->cwnd > PCS_CS_INIT_CWND) {
-				cs->cwnd = PCS_CS_INIT_CWND;
-				if (cs->eff_cwnd > PCS_CS_INIT_CWND)
-					cs->eff_cwnd = PCS_CS_INIT_CWND;
+				unsigned int cwnd = PCS_CS_INIT_CWND;
+				TRACE("Congestion window on CS#" NODE_FMT " was not used, shrink %u -> %u", NODE_ARGS(cs->id),
+					cs->cwnd, cwnd);
+				if (cs->cwnd > cs->ssthresh)
+					cs->ssthresh = cs->cwnd;
+				cs->cwnd = cwnd;
+				if (cs->eff_cwnd > cwnd)
+					cs->eff_cwnd = cwnd;
 			}
 		}
 	}
@@ -966,6 +980,8 @@ void cs_cwnd_use_or_lose(struct pcs_cs *cs)
 
 			FUSE_KTRACE(cc_from_csset(cs->css)->fc, "Congestion window on CS#" NODE_FMT " was not used, shrink %u -> %u", NODE_ARGS(cs->id),
 				    cs->cwnd, cwnd);
+			if (cs->cwnd > cs->ssthresh)
+				cs->ssthresh = cs->cwnd;
 			cs->cwnd = cwnd;
 			if (cs->eff_cwnd > cwnd)
 				cs->eff_cwnd = cwnd;
diff --git a/fs/fuse/kio/pcs/pcs_cs.h b/fs/fuse/kio/pcs/pcs_cs.h
index 1fb40936d046..513d53539211 100644
--- a/fs/fuse/kio/pcs/pcs_cs.h
+++ b/fs/fuse/kio/pcs/pcs_cs.h
@@ -52,6 +52,7 @@ struct pcs_cs {
 	unsigned int		in_flight;
 	unsigned int		eff_cwnd;
 	unsigned int		cwnd;
+	unsigned int            ssthresh;
 	int			cwr_state;
 	atomic_t		latency_avg;
 	unsigned int		net_latency_avg;
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index df71406bc85e..26b70df26b05 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -1415,6 +1415,11 @@ static void pcs_cs_deaccount(struct pcs_int_request *ireq, struct pcs_cs * cs, i
 		if (cs->last_latency > iolat_cutoff && ireq->type != PCS_IREQ_FLUSH) {
 			unsigned int clamp;
 
+			if (cs->cwnd >= PCS_CS_INIT_CWND)
+				cs->ssthresh = cs->cwnd;
+			else
+				cs->ssthresh = PCS_CS_INIT_CWND;
+
 			clamp = PCS_CS_INIT_CWND;
 			if (cs->last_latency > iolat_cutoff*8)
 				clamp = PCS_CS_INIT_CWND/8;
@@ -1430,7 +1435,7 @@ static void pcs_cs_deaccount(struct pcs_int_request *ireq, struct pcs_cs * cs, i
 		} else if (cs->in_flight >= cs->cwnd && !cs->cwr_state && worth_to_grow(ireq, cs)) {
 			unsigned int cwnd;
 
-			if (cs->cwnd < PCS_CS_INIT_CWND)
+			if (cs->cwnd <= cs->ssthresh)
 				cwnd = cs->cwnd + cost;
 			else
 				cwnd = cs->cwnd + 0x100000000ULL/cs->cwnd;



More information about the Devel mailing list