[Devel] [PATCH RHEL9 COMMIT] ploop: port and fix the standby mode feature

Konstantin Khorenko khorenko at virtuozzo.com
Mon Nov 21 15:30:34 MSK 2022


The commit is pushed to "branch-rh9-5.14.0-70.22.1.vz9.17.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-70.22.1.vz9.17.10
------>
commit e3d5cca08b5ed1abd6f8041ac531978d55562ecf
Author: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
Date:   Fri Nov 4 14:48:20 2022 +0200

    ploop: port and fix the standby mode feature
    
    Initially in commit 80da9c2abac9 ("ploop: add a standby mode") a flag
    on the block device's request queue was added to put the queue
    into standby mode on EBUSY. Later on, the list with errors was extended
    in commit e868f1e0b1a3 ("ploop: move to standby after -ENOTCONN too") and
    in commit 4b1eb3f667eb ("ploop: kaio: Enter standby mode on EIO as well").
    
    These were introduced to solve a problem on a specific device,
    that will clear the standby flag at some point.
    But the problem is that the clear counterpart was missing
    for the rest of the devices, so once ploop gets one of the errors
    it stops processing requests indefinitely.
    
    When porting the feature restrict the standby mode flag to work only
    on devices that handle it. To achieve this introduce a static key
    to enable the flag handling only when key is enabled - checks are
    at the start and at the end of every request and we want to avoid
    performance impact from these checks.
    
    Currently the only example of a configuration which supports ploop
    standby mode is the following:
     - vStorage is mounted on the Host
     - ploop device which image is stored on the vStorage
     - vStorage provides an iSCSI target which is built on that ploop
       device, the target is handled by the SCST kernel module
    
    How these static key/standby queue flag are going to be used:
     - The global "ploop_standby_check" static key will be enabled in
       the SCST module init
     - SCST will set the QUEUE_STANDBY_EN bit when a ploop device is
       added to SCST, and clear it when it is deleted from SCST
    
    On systems where we are in mixed mode, meaning we have both devices that
    support the standby flag and devices that do not, a QUEUE_FLAG_STANDBY_EN
    is introduced to indicate the standby support from the device.
    Setting the QUEUE_FLAG_STANDBY_EN means the device promises to clear
    QUEUE_FLAG_STANDBY flag when it recovers, so ploop can continue processing
    requests.
    
    To protect from errors we use the static key and the standby_en bit
    as two fuses - if one is off we do not touch anything on the queue.
    If we detect inconsistency at key or bits usage - we just warn
    so it can be fixed.
    
    The state of the flags is exported via /sys/block/*/queue/standby, or
    at /sys/devices/virtual/block/*/queue/standby depending on the device.
     * not supported - no enabled on the queue
     * on  - queue is in standby mode, not processing requests
     * off - queue is processing requests
    
    https://jira.sw.ru/browse/PSBM-142759
    Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
    Reviewed-by: Kui Liu <Kui.Liu at acronis.com>
    
    Feature: dm-ploop: standby mode
    
    =============================================================
    Differences in vz7/vz9 implementation (explained by Liu Kui):
    
    When a ploop is switched to standby mode, its request queue flag
    QUEUE_FLAG_STANDBY is set.
    
     * Once the bit is detected by SCST, the userspace will initiate
       recovery by replacing the top delta file without destroying the
       device, which is why the bit is cleared in ploop_replace_delta()
       in vz7.
    
     * In vz9, replace delta is achieved by table reload, which reallocates
       a new ploop instance, but keeps underlying mapped_device unchanged,
       thus request_queue unchanged, so we have to clear the bit in
       ploop_ctr().
---
 block/blk-sysfs.c            | 10 ++++++++++
 drivers/md/dm-ploop-map.c    | 35 ++++++++++++++++++++++++++++++++++-
 drivers/md/dm-ploop-target.c | 28 +++++++++++++++++++++++++++-
 drivers/md/dm-ploop.h        |  1 +
 include/linux/blkdev.h       |  4 ++++
 kernel/ve/ve.c               |  8 ++++++++
 6 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4be6462c0008..c558c6c9a0ad 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -552,6 +552,14 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
 	return queue_var_show(blk_queue_dax(q), page);
 }
 
+static ssize_t queue_standby_show(struct request_queue *q, char *page)
+{
+	if (!blk_queue_standby_en(q))
+		return sprintf(page, "not supported\n");
+	return sprintf(page, "%s\n",
+		blk_queue_standby(q) ? "on" : "off");
+}
+
 #define QUEUE_RO_ENTRY(_prefix, _name)			\
 static struct queue_sysfs_entry _prefix##_entry = {	\
 	.attr	= { .name = _name, .mode = 0444 },	\
@@ -606,6 +614,7 @@ QUEUE_RO_ENTRY(queue_dax, "dax");
 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
 QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
+QUEUE_RO_ENTRY(queue_standby, "standby");
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
@@ -667,6 +676,7 @@ static struct attribute *queue_attrs[] = {
 	&blk_throtl_sample_time_entry.attr,
 #endif
 	&queue_virt_boundary_mask_entry.attr,
+	&queue_standby_entry.attr,
 	NULL,
 };
 
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index bcbe3c644779..c0f6da751ec0 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -28,6 +28,8 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr,
 
 #define DM_MSG_PREFIX "ploop"
 
+extern struct static_key_false ploop_standby_check;
+
 static unsigned int ploop_pio_nr_segs(struct pio *pio)
 {
 	struct bvec_iter bi = {
@@ -1165,6 +1167,26 @@ static void ploop_queue_resubmit(struct pio *pio)
 	queue_work(ploop->wq, &ploop->worker);
 }
 
+static void ploop_check_standby_mode(struct ploop *ploop, long res)
+{
+	struct request_queue *q = ploop_blk_queue(ploop);
+	int prev;
+
+	if (!blk_queue_standby_en(q))
+		return;
+
+	/* move to standby if delta lease was stolen or mount is gone */
+	if (res != -EBUSY && res != -ENOTCONN && res != -EIO)
+		return;
+
+	spin_lock_irq(&q->queue_lock);
+	prev = blk_queue_flag_test_and_set(QUEUE_FLAG_STANDBY, q);
+	spin_unlock_irq(&q->queue_lock);
+
+	if (!prev)
+		PL_INFO("was switched into the standby mode");
+}
+
 static void ploop_data_rw_complete(struct pio *pio)
 {
 	bool completed;
@@ -1177,6 +1199,8 @@ static void ploop_data_rw_complete(struct pio *pio)
 			ploop_queue_resubmit(pio);
 			return;
 		}
+		if (static_branch_unlikely(&ploop_standby_check))
+			ploop_check_standby_mode(pio->ploop, pio->ret);
 		pio->bi_status = errno_to_blk_status(pio->ret);
 	}
 
@@ -1817,8 +1841,11 @@ void do_ploop_fsync_work(struct work_struct *ws)
 	ret = vfs_fsync(file, 0);
 
 	while ((pio = ploop_pio_list_pop(&flush_pios)) != NULL) {
-		if (unlikely(ret))
+		if (unlikely(ret)) {
 			pio->bi_status = errno_to_blk_status(ret);
+			if (static_branch_unlikely(&ploop_standby_check))
+				ploop_check_standby_mode(ploop, ret);
+		}
 		ploop_pio_endio(pio);
 	}
 }
@@ -1871,6 +1898,12 @@ int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
 	struct ploop_rq *prq;
 	struct pio *pio;
 
+
+	if (static_branch_unlikely(&ploop_standby_check)) {
+		if (blk_queue_standby(ploop_blk_queue(ploop)))
+			return DM_MAPIO_KILL;
+	}
+
 	if (blk_rq_bytes(rq) && ploop_rq_valid(ploop, rq) < 0)
 		return DM_MAPIO_KILL;
 
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index 673d8b955246..cc5dbbfa707f 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -21,7 +21,6 @@
 #include <linux/uio.h>
 #include <linux/error-injection.h>
 #include "dm-ploop.h"
-#include "dm-core.h"
 
 #define DM_MSG_PREFIX "ploop"
 
@@ -33,6 +32,7 @@ MODULE_PARM_DESC(ignore_signature_disk_in_use,
 static struct kmem_cache *prq_cache;
 static struct kmem_cache *pio_cache;
 struct kmem_cache *cow_cache;
+extern struct static_key_false ploop_standby_check;
 
 static void ploop_aio_do_completion(struct pio *pio)
 {
@@ -407,6 +407,32 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->private = ploop;
 	ploop->ti = ti;
 
+	/*
+	 * Static branch and standby_en bit act as two fuses.
+	 * We only touch standby bit if both are set.
+	 */
+	if (static_branch_unlikely(&ploop_standby_check)) {
+		if (blk_queue_standby_en(ploop_blk_queue(ploop))) {
+			PL_INFO("standby support enabled\n");
+			if (blk_queue_standby(ploop_blk_queue(ploop))) {
+				blk_queue_flag_clear(QUEUE_FLAG_STANDBY,
+						ploop_blk_queue(ploop));
+				PL_INFO("recovering device from standby\n");
+			}
+		}
+	} else {
+		struct request_queue *q = ploop_blk_queue(ploop);
+#define W_FMT "ploop_standby_check is off on %s but it has bits set: %s%s%s\n"
+		/* queue flags sanity checks - warn if something looks wrong */
+		WARN_ONCE(blk_queue_standby(q) || blk_queue_standby_en(q),
+			W_FMT, ploop_device_name(ploop),
+			(blk_queue_standby(q) ? "standby" : ""),
+			(blk_queue_standby(q) && blk_queue_standby_en(q) ?
+				", " : ""),
+			(blk_queue_standby_en(q) ? "standby_en" : ""));
+#undef W_FMT
+	}
+
 	if (kstrtou32(argv[0], 10, &ploop->cluster_log) < 0) {
 		ret = -EINVAL;
 		ti->error = "could not parse cluster_log";
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 4a25d869dd26..2d1db3fc8c57 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -231,6 +231,7 @@ struct ploop {
 	struct timer_list enospc_timer;
 	bool event_enospc;
 };
+#define ploop_blk_queue(p) ((p)->ti->table->md->queue)
 
 struct ploop_rq {
 	struct request *rq;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8fcd753c70b0..bafe008245c0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -434,6 +434,8 @@ struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
+#define QUEUE_FLAG_STANDBY      30      /* unable to handle read/write requests */
+#define QUEUE_FLAG_STANDBY_EN   31      /* enable standby queue flag */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP) |		\
@@ -480,6 +482,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_standby(q)    test_bit(QUEUE_FLAG_STANDBY, &(q)->queue_flags)
+#define blk_queue_standby_en(q)    test_bit(QUEUE_FLAG_STANDBY_EN, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 9ee16b66ba4e..8f15aefcd6d0 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -54,6 +54,14 @@ extern struct kmapset_set sysfs_ve_perms_set;
 
 static struct kmem_cache *ve_cachep;
 
+/*
+ * This static key is used to enable queue standby flags checks in ploop code.
+ * These checks used by drivers that support managing the queue flags.
+ * But to avoid creating inter module dependencies leave key here.
+ */
+DEFINE_STATIC_KEY_FALSE(ploop_standby_check);
+EXPORT_SYMBOL(ploop_standby_check);
+
 static DEFINE_PER_CPU(struct kstat_lat_pcpu_snap_struct, ve0_lat_stats);
 
 struct ve_struct ve0 = {


More information about the Devel mailing list