[Devel] [PATCH RHEL8 COMMIT] fs/fuse kio: fix problem with simultaneous map resolving

Konstantin Khorenko khorenko at virtuozzo.com
Fri Apr 23 11:54:59 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.19
------>
commit a6f98963b04851c958e7c81b287677667c20b924
Author: Ildar Ismagilov <ildar.ismagilov at virtuozzo.com>
Date:   Fri Apr 23 11:54:59 2021 +0300

    fs/fuse kio: fix problem with simultaneous map resolving
    
    Simultaneous map resolving is possible, beacuse during the
    resolving of the map, it may go into an error state (explicit
    assign m->state = PCS_MAP_ERROR) and after that we can try to
    resolve it again. For example, while map state is being updated
    from RO to RW, the new READ request may call __map_error()
    due to the fact that all CS are blacklisted.
    This may cause kernel panic:
    
    kernel BUG at fs/fuse/kio/pcs/pcs_fuse_kdirect.c:543!
    Call Trace:
      pcs_map_queue_resolve+0x12e/0x370 [fuse_kio_pcs]
      map_submit+0x228/0x4b0 [fuse_kio_pcs]
      pcs_cs_wakeup+0x114/0x280 [fuse_kio_pcs]
      pcs_deaccount_ireq+0x37f/0x4c0 [fuse_kio_pcs]
      map_notify_soft_error+0xdb/0x440 [fuse_kio_pcs]
      pcs_sreq_complete+0x1f9/0x270 [fuse_kio_pcs]
      cs_response_done+0x1cb/0x2c0 [fuse_kio_pcs]
      cs_sent+0x2d/0x40 [fuse_kio_pcs]
      rpc_abort+0x2d3/0x420 [fuse_kio_pcs]
      pcs_rpc_reset+0x1c/0x40 [fuse_kio_pcs]
      pcs_rdmaconnect_start+0x106/0x3a0 [fuse_kio_pcs]
      cs_connect+0x126/0x290 [fuse_kio_pcs]
      pcs_rpc_connect+0x40/0x70 [fuse_kio_pcs]
      pcs_rpc_send+0x97/0x1c0 [fuse_kio_pcs]
      rpc_queue_work+0x12c/0x380 [fuse_kio_pcs]
      process_one_work+0x185/0x440
      worker_thread+0x126/0x3c0
      kthread+0xd1/0xe0
      ret_from_fork_nospec_begin+0x7/0x21
    
    To resolve this problem, we postpone all requests that try to
    move the map into an error state until state of map is resolved.
    
    https://pmc.acronis.com/browse/VSTOR-39656
    
    Signed-off-by: Ildar Ismagilov <ildar.ismagilov at virtuozzo.com>
    
    Reviewed-by: Alexey Kuznetsov <kuznet at acronis.com>
---
 fs/fuse/kio/pcs/pcs_map.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 1a9f1e9dbc41..4819fff742fa 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -741,12 +741,6 @@ static inline void map_remote_error_nolock(struct pcs_map_entry *m , int error,
 {
 	__map_error(m, 1 , error, offender);
 }
-static void map_remote_error(struct pcs_map_entry *m , int error, u64 offender)
-{
-	spin_lock(&m->lock);
-	map_remote_error_nolock(m, error, offender);
-	spin_unlock(&m->lock);
-}
 
 void pcs_map_notify_addr_change(struct pcs_cs * cs)
 {
@@ -1097,6 +1091,8 @@ void pcs_map_complete(struct pcs_map_entry *m, struct pcs_ioc_getmap *omap)
 		   m->state = PCS_MAP_ERROR;
 		   If m->state becomes atomic bit fields this will be impossible.
 		 */
+		TRACE("skip getmap resp: m:%p, state:%x resp{ st:%d, err:%d, v:" VER_FMT "}\n",
+		      m, m->state, omap->state, omap->error.value, VER_ARGS(omap->version));
 		spin_unlock(&m->lock);
 		goto out_ignore;
 	}
@@ -1902,6 +1898,23 @@ pcs_ireq_split(struct pcs_int_request *ireq, unsigned int iochunk, int noalign)
 	return sreq;
 }
 
+static inline bool ireq_remote_error(struct pcs_int_request *ireq,
+				     struct pcs_map_entry *m,
+				     int error, u64 offender)
+{
+	spin_lock(&m->lock);
+	if (m->state & PCS_MAP_RESOLVING) {
+		/* Defer request until the map is resolved */
+		list_add_tail(&ireq->list, &m->queue);
+		spin_unlock(&m->lock);
+		return false;
+	}
+	map_remote_error_nolock(m, error, offender);
+	spin_unlock(&m->lock);
+
+	return true;
+}
+
 static int pcs_cslist_submit_read(struct pcs_int_request *ireq, struct pcs_cs_list * csl)
 {
 	struct pcs_cluster_core *cc = ireq->cc;
@@ -1947,7 +1960,8 @@ static int pcs_cslist_submit_read(struct pcs_int_request *ireq, struct pcs_cs_li
 			 * and let MDS to figure what heppened with the rest.
 			 */
 			cs = csl->cs[0].cslink.cs;
-			map_remote_error(ireq->iochunk.map, cs->blacklist_reason, cs->id.val);
+			if (!ireq_remote_error(ireq, ireq->iochunk.map, cs->blacklist_reason, cs->id.val))
+				return 0;
 
 			FUSE_KTRACE(ireq->cc->fc, "Read from " MAP_FMT " blocked by blacklist error %d, CS" NODE_FMT,
 			      MAP_ARGS(ireq->iochunk.map), cs->blacklist_reason, NODE_ARGS(cs->id));
@@ -2112,7 +2126,8 @@ static int pcs_cslist_submit_write(struct pcs_int_request *ireq, struct pcs_cs_l
 	for (i = 0; i < csl->nsrv; i++) {
 		cs = csl->cs[i].cslink.cs;
 		if (cs_is_blacklisted(cs)) {
-			map_remote_error(ireq->iochunk.map, cs->blacklist_reason, cs->id.val);
+			if (!ireq_remote_error(ireq, ireq->iochunk.map, cs->blacklist_reason, cs->id.val))
+				return 0;
 			FUSE_KTRACE(cc_from_csset(cs->css)->fc, "Write to " MAP_FMT " blocked by blacklist error %d, CS" NODE_FMT,
 			      MAP_ARGS(ireq->iochunk.map), cs->blacklist_reason, NODE_ARGS(cs->id));
 			spin_lock(&ireq->completion_data.child_lock);
@@ -2219,7 +2234,8 @@ static int pcs_cslist_submit_flush(struct pcs_int_request *ireq, struct pcs_cs_l
 		cs = csl->cs[i].cslink.cs;
 
 		if (cs_is_blacklisted(cs)) {
-			map_remote_error(ireq->flushreq.map, cs->blacklist_reason, cs->id.val);
+			if (!ireq_remote_error(ireq, ireq->flushreq.map, cs->blacklist_reason, cs->id.val))
+				return 0;
 			FUSE_KTRACE(cc_from_csset(cs->css)->fc, "Flush to " MAP_FMT " blocked by blacklist error %d, CS" NODE_FMT,
 			      MAP_ARGS(ireq->flushreq.map), cs->blacklist_reason, NODE_ARGS(cs->id));
 			spin_lock(&ireq->completion_data.child_lock);


More information about the Devel mailing list