[Devel] [PATCH vz9 v3] ploop: port and fix the standby mode feature.

Alexander Atanasov alexander.atanasov at virtuozzo.com
Fri Nov 4 15:48:20 MSK 2022


Initially in commit 80da9c2abac9 ("ploop: add a standby mode") a flag
on the block device's request queue was added to put the queue
into standby mode on EBUSY. Later on, the list with errors was extended
in commit e868f1e0b1a3 ("ploop: move to standby after -ENOTCONN too") and
in commit 4b1eb3f667eb ("ploop: kaio: Enter standby mode on EIO as well").

These were introduced to solve a problem on a specific device,
that will clear the standby flag at some point.
But the problem is that the clear counterpart was missing
for the rest of the devices, so once ploop gets one of the errors
it stops processing requests indefinitely.

When porting the feature restrict the standby mode flag to work only
on devices that handle it. To acheve this introduce a static key
to enable the flag handling only when key is enabled - checks are
at the start and at the end of every request and we want to avoid
performance impact from these checks.

On systems where we are in mixed mode, meaning we have both devices that
support the standby flag and devices that do not, a QUEUE_FLAG_STANDBY_EN
is introduced to indicate the standby support from the device.
Setting the QUEUE_FLAG_STANDBY_EN means the device promises to clear
QUEUE_FLAG_STANDBY flag when it recovers, so ploop can continue processing
requests.

To protect from errors we use the static key and the standby_en bit
as two fuses - if one is off we do not touch anything on the queue.
If we detect inconsistency at key or bits usage - we just warn
so it can be fixed.

The state of the flags is exported via /sys/block/*/queue/standby,
or at /sys/devices/virtual/block/*/queue depending on the device.
not supported - no enabled on the queue
on - queue is in standby mode, not processing requests
off - queue is processing requests

https://jira.sw.ru/browse/PSBM-142759
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
---
 block/blk-sysfs.c            | 10 ++++++++++
 drivers/md/dm-ploop-map.c    | 35 ++++++++++++++++++++++++++++++++++-
 drivers/md/dm-ploop-target.c | 28 +++++++++++++++++++++++++++-
 drivers/md/dm-ploop.h        |  1 +
 include/linux/blkdev.h       |  4 ++++
 kernel/ve/ve.c               |  9 +++++++++
 6 files changed, 85 insertions(+), 2 deletions(-)

Cc: Denis V. Lunev <den at virtuozzo.com>
Cc: Kui Liu <Kui.Liu at acronis.com>
Cc: Alexey Kuznetsov <kuznet at acronis.com>


diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4be6462c0008..c558c6c9a0ad 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -552,6 +552,14 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
 	return queue_var_show(blk_queue_dax(q), page);
 }
 
+static ssize_t queue_standby_show(struct request_queue *q, char *page)
+{
+	if (!blk_queue_standby_en(q))
+		return sprintf(page, "not supported\n");
+	return sprintf(page, "%s\n",
+		blk_queue_standby(q) ? "on" : "off");
+}
+
 #define QUEUE_RO_ENTRY(_prefix, _name)			\
 static struct queue_sysfs_entry _prefix##_entry = {	\
 	.attr	= { .name = _name, .mode = 0444 },	\
@@ -606,6 +614,7 @@ QUEUE_RO_ENTRY(queue_dax, "dax");
 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
 QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
+QUEUE_RO_ENTRY(queue_standby, "standby");
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
@@ -667,6 +676,7 @@ static struct attribute *queue_attrs[] = {
 	&blk_throtl_sample_time_entry.attr,
 #endif
 	&queue_virt_boundary_mask_entry.attr,
+	&queue_standby_entry.attr,
 	NULL,
 };
 
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index bcbe3c644779..c0f6da751ec0 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -28,6 +28,8 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr,
 
 #define DM_MSG_PREFIX "ploop"
 
+extern struct static_key_false ploop_standby_check;
+
 static unsigned int ploop_pio_nr_segs(struct pio *pio)
 {
 	struct bvec_iter bi = {
@@ -1165,6 +1167,26 @@ static void ploop_queue_resubmit(struct pio *pio)
 	queue_work(ploop->wq, &ploop->worker);
 }
 
+static void ploop_check_standby_mode(struct ploop *ploop, long res)
+{
+	struct request_queue *q = ploop_blk_queue(ploop);
+	int prev;
+
+	if (!blk_queue_standby_en(q))
+		return;
+
+	/* move to standby if delta lease was stolen or mount is gone */
+	if (res != -EBUSY && res != -ENOTCONN && res != -EIO)
+		return;
+
+	spin_lock_irq(&q->queue_lock);
+	prev = blk_queue_flag_test_and_set(QUEUE_FLAG_STANDBY, q);
+	spin_unlock_irq(&q->queue_lock);
+
+	if (!prev)
+		PL_INFO("was switched into the standby mode");
+}
+
 static void ploop_data_rw_complete(struct pio *pio)
 {
 	bool completed;
@@ -1177,6 +1199,8 @@ static void ploop_data_rw_complete(struct pio *pio)
 			ploop_queue_resubmit(pio);
 			return;
 		}
+		if (static_branch_unlikely(&ploop_standby_check))
+			ploop_check_standby_mode(pio->ploop, pio->ret);
 		pio->bi_status = errno_to_blk_status(pio->ret);
 	}
 
@@ -1817,8 +1841,11 @@ void do_ploop_fsync_work(struct work_struct *ws)
 	ret = vfs_fsync(file, 0);
 
 	while ((pio = ploop_pio_list_pop(&flush_pios)) != NULL) {
-		if (unlikely(ret))
+		if (unlikely(ret)) {
 			pio->bi_status = errno_to_blk_status(ret);
+			if (static_branch_unlikely(&ploop_standby_check))
+				ploop_check_standby_mode(ploop, ret);
+		}
 		ploop_pio_endio(pio);
 	}
 }
@@ -1871,6 +1898,12 @@ int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
 	struct ploop_rq *prq;
 	struct pio *pio;
 
+
+	if (static_branch_unlikely(&ploop_standby_check)) {
+		if (blk_queue_standby(ploop_blk_queue(ploop)))
+			return DM_MAPIO_KILL;
+	}
+
 	if (blk_rq_bytes(rq) && ploop_rq_valid(ploop, rq) < 0)
 		return DM_MAPIO_KILL;
 
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index 673d8b955246..4665e8ee75d1 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -21,7 +21,6 @@
 #include <linux/uio.h>
 #include <linux/error-injection.h>
 #include "dm-ploop.h"
-#include "dm-core.h"
 
 #define DM_MSG_PREFIX "ploop"
 
@@ -33,6 +32,7 @@ MODULE_PARM_DESC(ignore_signature_disk_in_use,
 static struct kmem_cache *prq_cache;
 static struct kmem_cache *pio_cache;
 struct kmem_cache *cow_cache;
+extern struct static_key_false ploop_standby_check;
 
 static void ploop_aio_do_completion(struct pio *pio)
 {
@@ -407,6 +407,32 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->private = ploop;
 	ploop->ti = ti;
 
+	/*
+	 * static branch and standby_en bit act as two fuses.
+	 * we only touch standby bit if both are set.
+	 */
+	if (static_branch_unlikely(&ploop_standby_check)) {
+		if (blk_queue_standby_en(ploop_blk_queue(ploop))) {
+			PL_INFO("standby support enabled\n");
+			if (blk_queue_standby(ploop_blk_queue(ploop))) {
+				blk_queue_flag_clear(QUEUE_FLAG_STANDBY,
+						ploop_blk_queue(ploop));
+				PL_INFO("recovering device from standby\n");
+			}
+		}
+	} else {
+		struct request_queue *q = ploop_blk_queue(ploop);
+#define W_FMT "ploop_standby_check is off on %s but it has bits set: %s%s%s\n"
+		/* queue flags sanity checks - warn if something looks wrong */
+		WARN_ONCE(blk_queue_standby(q) || blk_queue_standby_en(q),
+			W_FMT, ploop_device_name(ploop),
+			(blk_queue_standby(q) ? "standby" : ""),
+			(blk_queue_standby(q) && blk_queue_standby_en(q) ?
+				", " : ""),
+			(blk_queue_standby_en(q) ? "standby_en" : ""));
+#undef W_FMT
+	}
+
 	if (kstrtou32(argv[0], 10, &ploop->cluster_log) < 0) {
 		ret = -EINVAL;
 		ti->error = "could not parse cluster_log";
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 2228096e7743..aea635b6a3fc 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -231,6 +231,7 @@ struct ploop {
 	struct timer_list enospc_timer;
 	bool event_enospc;
 };
+#define ploop_blk_queue(p) ((p)->ti->table->md->queue)
 
 struct ploop_rq {
 	struct request *rq;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8fcd753c70b0..bafe008245c0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -434,6 +434,8 @@ struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
+#define QUEUE_FLAG_STANDBY      30      /* unable to handle read/write requests */
+#define QUEUE_FLAG_STANDBY_EN   31      /* enable standby queue flag */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP) |		\
@@ -480,6 +482,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_standby(q)    test_bit(QUEUE_FLAG_STANDBY, &(q)->queue_flags)
+#define blk_queue_standby_en(q)    test_bit(QUEUE_FLAG_STANDBY_EN, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 9ee16b66ba4e..8c614f0c8be0 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -54,6 +54,15 @@ extern struct kmapset_set sysfs_ve_perms_set;
 
 static struct kmem_cache *ve_cachep;
 
+/*
+ * this static key is used to enable queue standby flags
+ * checks in ploop code. these checks used by drivers that
+ * support managing the queue flags.
+ * But to avoid creating inter module dependencies leave key here
+ */
+DEFINE_STATIC_KEY_FALSE(ploop_standby_check);
+EXPORT_SYMBOL(ploop_standby_check);
+
 static DEFINE_PER_CPU(struct kstat_lat_pcpu_snap_struct, ve0_lat_stats);
 
 struct ve_struct ve0 = {
-- 
2.31.1



More information about the Devel mailing list