[Devel] [PATCH rh8 1/7] ploop: Add timeout to ploop_inflight_bios_ref_switch()

Kirill Tkhai ktkhai at virtuozzo.com
Tue Mar 10 15:16:12 MSK 2020


We can imagine a situation, when single request on underlining bdev
is hung, while another requests can still be served (say, recently
we have alive fuse, where one request was lost because of userspace
driver implementation bug).

This case we should not try to wait it forever. Add a timeout to break
waiting after 1 minute of waiting.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 drivers/md/dm-ploop-cmd.c |   56 +++++++++++++++++++++++++++++++++++++--------
 drivers/md/dm-ploop-map.c |    2 +-
 drivers/md/dm-ploop.h     |    4 ++-
 3 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index 30cb4f7fbfc4..0d245ef505aa 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -64,6 +64,24 @@ static void ploop_advance_bat_and_holes(struct ploop *ploop,
 	write_unlock_irq(&ploop->bat_rwlock);
 }
 
+static int wait_for_completion_maybe_killable(struct completion *comp,
+					      bool killable)
+{
+	int ret = 0;
+
+	if (killable) {
+		ret = wait_for_completion_killable_timeout(comp, PLOOP_INFLIGHT_TIMEOUT);
+		if (!ret)
+			ret = -ETIMEDOUT;
+		else if (ret > 0)
+			ret = 0;
+	} else {
+		wait_for_completion(comp);
+	}
+
+	return ret;
+}
+
 /*
  * Switch index of ploop->inflight_bios_ref[] and wait till inflight
  * bios are completed. This waits for completion of simple submitted
@@ -73,12 +91,24 @@ static void ploop_advance_bat_and_holes(struct ploop *ploop,
  * weaker, than "dmsetup suspend".
  * It is called from kwork only, so this can't be executed in parallel.
  */
-void ploop_inflight_bios_ref_switch(struct ploop *ploop)
+int ploop_inflight_bios_ref_switch(struct ploop *ploop, bool killable)
 {
+	struct completion *comp = &ploop->inflight_bios_ref_comp;
 	unsigned int index = ploop->inflight_bios_ref_index;
+	int ret;
 
 	WARN_ON_ONCE(!(current->flags & PF_WQ_WORKER));
-	init_completion(&ploop->inflight_bios_ref_comp);
+
+	if (ploop->inflight_ref_comp_pending) {
+		/* Previous completion was interrupted */
+		ret = wait_for_completion_maybe_killable(comp, killable);
+		if (ret)
+			return ret;
+		ploop->inflight_ref_comp_pending = false;
+		percpu_ref_reinit(&ploop->inflight_bios_ref[!index]);
+	}
+
+	init_completion(comp);
 
 	write_lock_irq(&ploop->bat_rwlock);
 	ploop->inflight_bios_ref_index = !index;
@@ -86,8 +116,14 @@ void ploop_inflight_bios_ref_switch(struct ploop *ploop)
 
 	percpu_ref_kill(&ploop->inflight_bios_ref[index]);
 
-	wait_for_completion(&ploop->inflight_bios_ref_comp);
+	ret = wait_for_completion_maybe_killable(comp, killable);
+	if (ret) {
+		ploop->inflight_ref_comp_pending = true;
+		return ret;
+	}
+
 	percpu_ref_reinit(&ploop->inflight_bios_ref[index]);
+	return 0;
 }
 
 /* Find existing BAT cluster pointing to dst_cluster */
@@ -216,7 +252,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
 
 	/* Redirect bios to kwork and wait inflights, which may use @cluster */
 	force_defer_bio_count_inc(ploop);
-	ploop_inflight_bios_ref_switch(ploop);
+	ploop_inflight_bios_ref_switch(ploop, false);
 
 	/* Read full cluster sync */
 	ret = ploop_read_cluster_sync(ploop, bio, dst_cluster);
@@ -729,7 +765,7 @@ static void process_merge_latest_snapshot_cmd(struct ploop *ploop,
 		write_lock_irq(&ploop->bat_rwlock);
 		file = ploop->deltas[--ploop->nr_deltas].file;
 		write_unlock_irq(&ploop->bat_rwlock);
-		ploop_inflight_bios_ref_switch(ploop);
+		ploop_inflight_bios_ref_switch(ploop, false);
 		fput(file);
 	}
 	complete(&cmd->comp); /* Last touch of cmd memory */
@@ -814,7 +850,7 @@ static void process_notify_delta_merged(struct ploop *ploop,
 	ploop->deltas[--ploop->nr_deltas].file = NULL;
 	write_unlock_irq(&ploop->bat_rwlock);
 
-	ploop_inflight_bios_ref_switch(ploop);
+	ploop_inflight_bios_ref_switch(ploop, false);
 	fput(file);
 
 	cmd->retval = 0;
@@ -851,7 +887,7 @@ static void process_update_delta_index(struct ploop *ploop,
 unlock:
 	write_unlock_irq(&ploop->bat_rwlock);
 	if (!ret)
-		ploop_inflight_bios_ref_switch(ploop);
+		ploop_inflight_bios_ref_switch(ploop, false);
 
 	cmd->retval = ret;
 	complete(&cmd->comp); /* Last touch of cmd memory */
@@ -975,7 +1011,7 @@ static void process_switch_top_delta(struct ploop *ploop, struct ploop_cmd *cmd)
 	unsigned int i, size, bat_clusters, level = ploop->nr_deltas;
 
 	force_defer_bio_count_inc(ploop);
-	ploop_inflight_bios_ref_switch(ploop);
+	ploop_inflight_bios_ref_switch(ploop, false);
 
 	/* If you add more two-stages-actions, you must cancel them here too */
 	cancel_discard_bios(ploop);
@@ -1085,7 +1121,7 @@ static void process_flip_upper_deltas(struct ploop *ploop, struct ploop_cmd *cmd
 	swap(ploop->deltas[level].file, cmd->flip_upper_deltas.file);
 	write_unlock_irq(&ploop->bat_rwlock);
 	/* Device is suspended, but anyway... */
-	ploop_inflight_bios_ref_switch(ploop);
+	ploop_inflight_bios_ref_switch(ploop, false);
 
 	cmd->retval = 0;
 	complete(&cmd->comp); /* Last touch of cmd memory */
@@ -1106,7 +1142,7 @@ static void process_tracking_start(struct ploop *ploop, struct ploop_cmd *cmd)
 	 * Here we care about ploop_map() sees ploop->tracking_bitmap,
 	 * since the rest of submitting are made from *this* kwork.
 	 */
-	ploop_inflight_bios_ref_switch(ploop);
+	ploop_inflight_bios_ref_switch(ploop, false);
 
 	write_lock_irq(&ploop->bat_rwlock);
 	for_each_clear_bit(i, ploop->holes_bitmap, ploop->hb_nr)
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 4ca0f8e83400..d7cc5f078e14 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -437,7 +437,7 @@ static void handle_discard_bio(struct ploop *ploop, struct bio *bio,
 		 */
 		ploop->force_link_inflight_bios = true;
 		force_defer_bio_count_inc(ploop);
-		ploop_inflight_bios_ref_switch(ploop);
+		ploop_inflight_bios_ref_switch(ploop, false);
 	}
 
 	spin_lock_irqsave(&ploop->deferred_lock, flags);
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 1dc29e4f5345..3b0a96da837f 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -111,6 +111,7 @@ struct ploop_cmd {
 
 #define BAT_LEVEL_TOP		U8_MAX
 #define CLEANUP_DELAY		20
+#define PLOOP_INFLIGHT_TIMEOUT	(60 * HZ)
 
 #define PLOOP_BIOS_HTABLE_BITS	8
 #define PLOOP_BIOS_HTABLE_SIZE	(1 << PLOOP_BIOS_HTABLE_BITS)
@@ -201,6 +202,7 @@ struct ploop {
 
 	struct completion inflight_bios_ref_comp;
 	struct percpu_ref inflight_bios_ref[2];
+	bool inflight_ref_comp_pending;
 	unsigned int inflight_bios_ref_index:1;
 
 	spinlock_t deferred_lock;
@@ -387,7 +389,7 @@ extern void process_deferred_cmd(struct ploop *ploop,
 			struct ploop_index_wb *piwb);
 extern int ploop_map(struct dm_target *ti, struct bio *bio);
 extern int ploop_endio(struct dm_target *ti, struct bio *bio, blk_status_t *err);
-extern void ploop_inflight_bios_ref_switch(struct ploop *ploop);
+extern int ploop_inflight_bios_ref_switch(struct ploop *ploop, bool killable);
 extern struct dm_ploop_endio_hook *find_lk_of_cluster(struct ploop *ploop,
 						      unsigned int cluster);
 extern void unlink_postponed_backup_endio(struct ploop *ploop,




More information about the Devel mailing list