[Devel] [PATCH VZ9] fs/fuse kio: race condition dereferencing struct cs

Alexey Kuznetsov kuznet at virtuozzo.com
Thu Oct 17 14:44:19 MSK 2024


Subject: fs/fuse kio: race condition dereferencing struct cs

Original author had a wrong idea about handling lists,
which was difficult to notice. Unfortunately, the place
is rarely used and we crashed there only now.

After csset lock released the first item fetched from
head of private list could change its state, whitelisted
(a it happens in the bug) or even moved back to blacklist.

https://virtuozzo.atlassian.net/browse/VSTOR-91475

Affects: #VSTOR-91475

Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
---
 fs/fuse/kio/pcs/pcs_cs.c | 43 +++++++++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index ed3de75..b2ad511 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -987,6 +987,7 @@ static void pcs_cs_isolate(struct pcs_cs *cs, struct list_head *dispose)
 	hlist_del_rcu(&cs->hlist);
 	list_del(&cs->lru_link);
 	list_del(&cs->bl_link);
+	clear_bit(CS_SF_BLACKLISTED, &cs->state);
 	cs->css->ncs--;
 
 	if (list_empty(&cs->css->bl_list))
@@ -1267,49 +1268,67 @@ static void bl_timer_work(struct work_struct *w)
 	struct pcs_cs_set *css = container_of(w, struct pcs_cs_set, bl_work.work);
 	struct pcs_cluster_core *cc = cc_from_csset(css);
 	LIST_HEAD(local_lst);
-	LIST_HEAD(to_blacklist);
 	LIST_HEAD(to_resubmit);
 
+	rcu_read_lock();
 	spin_lock(&css->lock);
 	list_splice_tail_init(&css->bl_list, &local_lst);
-	spin_unlock(&css->lock);
-	if (list_empty(&local_lst))
-		return;
 
 	while (!list_empty(&local_lst)) {
 		struct pcs_cs *cs;
 		struct pcs_msg *msg;
 
 		cs = list_first_entry(&local_lst, struct pcs_cs, bl_link);
+		spin_unlock(&css->lock);
 
+		/* cs can be removed from the list after csset unlock and even isolated,
+		 * but it cannot be destroyed due to rcu. So, we can take cs lock
+		 * and ignore found cs if it is removed from blacklist.
+		 */
 		spin_lock(&cs->lock);
-		BUG_ON(cs->is_dead);
-		list_move(&cs->bl_link, &to_blacklist);
-		if (cs->use_count) {
+		spin_lock(&css->lock);
+		if (list_empty(&local_lst) ||
+		    cs != list_first_entry(&local_lst, struct pcs_cs, bl_link)) {
 			spin_unlock(&cs->lock);
 			continue;
 		}
+
+		BUG_ON(!test_bit(CS_SF_BLACKLISTED, &cs->state) || cs->is_dead);
+
+		if (list_empty(&css->bl_list))
+			mod_delayed_work(cc->wq, &css->bl_work, PCS_CS_BLACKLIST_TIMER);
+		list_move(&cs->bl_link, &css->bl_list);
+		spin_unlock(&css->lock);
+
+		if (cs->use_count)
+			goto next_unlock;
+
 		if (!cs->nmaps) {
 			pcs_cs_isolate(cs, &to_resubmit);
 			spin_unlock(&cs->lock);
 			pcs_cs_destroy(cs);
-			continue;
+			goto next;
 		}
 		cs->use_count++;
 		spin_unlock(&cs->lock);
+		rcu_read_unlock();
+
 		msg = cs_prep_probe(cs);
 		if (msg)
 			pcs_rpc_call(cs->rpc, msg);
+
+		rcu_read_lock();
 		spin_lock(&cs->lock);
 		if (!msg)
 			cs->use_count--;
+
+next_unlock:
 		spin_unlock(&cs->lock);
+next:
+		spin_lock(&css->lock);
 	}
-	spin_lock(&css->lock);
-	list_splice(&to_blacklist, &css->bl_list);
-	if (list_empty(&css->bl_list))
-		mod_delayed_work(cc->wq, &css->bl_work, PCS_CS_BLACKLIST_TIMER);
 	spin_unlock(&css->lock);
+	rcu_read_unlock();
 
 	pcs_cc_requeue(cc, &to_resubmit);
 }
-- 
1.8.3.1



More information about the Devel mailing list