[Devel] [PATCH vz9 v3] ploop: port and fix the standby mode feature.

Kui Liu Kui.Liu at acronis.com
Wed Nov 16 18:20:59 MSK 2022


Hello, 

This patch looks good, no further questions from me. 

Regards,
LIU KUI
-----Original Message-----
From: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
Date: Friday, 4 November 2022 at 8:49 PM
To: Devel <devel at openvz.org>
Cc: Konstantin Khorenko <khorenko at virtuozzo.com>, Alexander Atanasov <alexander.atanasov at virtuozzo.com>, "Denis V . Lunev" <den at virtuozzo.com>, Kui Liu <Kui.Liu at acronis.com>, Alexey Kuznetsov <kuznet at acronis.com>
Subject: [PATCH vz9 v3] ploop: port and fix the standby mode feature.

    Initially in commit 80da9c2abac9 ("ploop: add a standby mode") a flag
    on the block device's request queue was added to put the queue
    into standby mode on EBUSY. Later on, the list with errors was extended
    in commit e868f1e0b1a3 ("ploop: move to standby after -ENOTCONN too") and
    in commit 4b1eb3f667eb ("ploop: kaio: Enter standby mode on EIO as well").

    These were introduced to solve a problem on a specific device,
    that will clear the standby flag at some point.
    But the problem is that the clear counterpart was missing
    for the rest of the devices, so once ploop gets one of the errors
    it stops processing requests indefinitely.

    When porting the feature restrict the standby mode flag to work only
    on devices that handle it. To acheve this introduce a static key
    to enable the flag handling only when key is enabled - checks are
    at the start and at the end of every request and we want to avoid
    performance impact from these checks.

    On systems where we are in mixed mode, meaning we have both devices that
    support the standby flag and devices that do not, a QUEUE_FLAG_STANDBY_EN
    is introduced to indicate the standby support from the device.
    Setting the QUEUE_FLAG_STANDBY_EN means the device promises to clear
    QUEUE_FLAG_STANDBY flag when it recovers, so ploop can continue processing
    requests.

    To protect from errors we use the static key and the standby_en bit
    as two fuses - if one is off we do not touch anything on the queue.
    If we detect inconsistency at key or bits usage - we just warn
    so it can be fixed.

    The state of the flags is exported via /sys/block/*/queue/standby,
    or at /sys/devices/virtual/block/*/queue depending on the device.
    not supported - no enabled on the queue
    on - queue is in standby mode, not processing requests
    off - queue is processing requests

    https://jira.sw.ru/browse/PSBM-142759
    Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
    ---
     block/blk-sysfs.c            | 10 ++++++++++
     drivers/md/dm-ploop-map.c    | 35 ++++++++++++++++++++++++++++++++++-
     drivers/md/dm-ploop-target.c | 28 +++++++++++++++++++++++++++-
     drivers/md/dm-ploop.h        |  1 +
     include/linux/blkdev.h       |  4 ++++
     kernel/ve/ve.c               |  9 +++++++++
     6 files changed, 85 insertions(+), 2 deletions(-)

    Cc: Denis V. Lunev <den at virtuozzo.com>
    Cc: Kui Liu <Kui.Liu at acronis.com>
    Cc: Alexey Kuznetsov <kuznet at acronis.com>


    diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
    index 4be6462c0008..c558c6c9a0ad 100644
    --- a/block/blk-sysfs.c
    +++ b/block/blk-sysfs.c
    @@ -552,6 +552,14 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
     	return queue_var_show(blk_queue_dax(q), page);
     }

    +static ssize_t queue_standby_show(struct request_queue *q, char *page)
    +{
    +	if (!blk_queue_standby_en(q))
    +		return sprintf(page, "not supported\n");
    +	return sprintf(page, "%s\n",
    +		blk_queue_standby(q) ? "on" : "off");
    +}
    +
     #define QUEUE_RO_ENTRY(_prefix, _name)			\
     static struct queue_sysfs_entry _prefix##_entry = {	\
     	.attr	= { .name = _name, .mode = 0444 },	\
    @@ -606,6 +614,7 @@ QUEUE_RO_ENTRY(queue_dax, "dax");
     QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
     QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
     QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
    +QUEUE_RO_ENTRY(queue_standby, "standby");

     #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
     QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
    @@ -667,6 +676,7 @@ static struct attribute *queue_attrs[] = {
     	&blk_throtl_sample_time_entry.attr,
     #endif
     	&queue_virt_boundary_mask_entry.attr,
    +	&queue_standby_entry.attr,
     	NULL,
     };

    diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
    index bcbe3c644779..c0f6da751ec0 100644
    --- a/drivers/md/dm-ploop-map.c
    +++ b/drivers/md/dm-ploop-map.c
    @@ -28,6 +28,8 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr,

     #define DM_MSG_PREFIX "ploop"

    +extern struct static_key_false ploop_standby_check;
    +
     static unsigned int ploop_pio_nr_segs(struct pio *pio)
     {
     	struct bvec_iter bi = {
    @@ -1165,6 +1167,26 @@ static void ploop_queue_resubmit(struct pio *pio)
     	queue_work(ploop->wq, &ploop->worker);
     }

    +static void ploop_check_standby_mode(struct ploop *ploop, long res)
    +{
    +	struct request_queue *q = ploop_blk_queue(ploop);
    +	int prev;
    +
    +	if (!blk_queue_standby_en(q))
    +		return;
    +
    +	/* move to standby if delta lease was stolen or mount is gone */
    +	if (res != -EBUSY && res != -ENOTCONN && res != -EIO)
    +		return;
    +
    +	spin_lock_irq(&q->queue_lock);
    +	prev = blk_queue_flag_test_and_set(QUEUE_FLAG_STANDBY, q);
    +	spin_unlock_irq(&q->queue_lock);
    +
    +	if (!prev)
    +		PL_INFO("was switched into the standby mode");
    +}
    +
     static void ploop_data_rw_complete(struct pio *pio)
     {
     	bool completed;
    @@ -1177,6 +1199,8 @@ static void ploop_data_rw_complete(struct pio *pio)
     			ploop_queue_resubmit(pio);
     			return;
     		}
    +		if (static_branch_unlikely(&ploop_standby_check))
    +			ploop_check_standby_mode(pio->ploop, pio->ret);
     		pio->bi_status = errno_to_blk_status(pio->ret);
     	}

    @@ -1817,8 +1841,11 @@ void do_ploop_fsync_work(struct work_struct *ws)
     	ret = vfs_fsync(file, 0);

     	while ((pio = ploop_pio_list_pop(&flush_pios)) != NULL) {
    -		if (unlikely(ret))
    +		if (unlikely(ret)) {
     			pio->bi_status = errno_to_blk_status(ret);
    +			if (static_branch_unlikely(&ploop_standby_check))
    +				ploop_check_standby_mode(ploop, ret);
    +		}
     		ploop_pio_endio(pio);
     	}
     }
    @@ -1871,6 +1898,12 @@ int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
     	struct ploop_rq *prq;
     	struct pio *pio;

    +
    +	if (static_branch_unlikely(&ploop_standby_check)) {
    +		if (blk_queue_standby(ploop_blk_queue(ploop)))
    +			return DM_MAPIO_KILL;
    +	}
    +
     	if (blk_rq_bytes(rq) && ploop_rq_valid(ploop, rq) < 0)
     		return DM_MAPIO_KILL;

    diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
    index 673d8b955246..4665e8ee75d1 100644
    --- a/drivers/md/dm-ploop-target.c
    +++ b/drivers/md/dm-ploop-target.c
    @@ -21,7 +21,6 @@
     #include <linux/uio.h>
     #include <linux/error-injection.h>
     #include "dm-ploop.h"
    -#include "dm-core.h"

     #define DM_MSG_PREFIX "ploop"

    @@ -33,6 +32,7 @@ MODULE_PARM_DESC(ignore_signature_disk_in_use,
     static struct kmem_cache *prq_cache;
     static struct kmem_cache *pio_cache;
     struct kmem_cache *cow_cache;
    +extern struct static_key_false ploop_standby_check;

     static void ploop_aio_do_completion(struct pio *pio)
     {
    @@ -407,6 +407,32 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
     	ti->private = ploop;
     	ploop->ti = ti;

    +	/*
    +	 * static branch and standby_en bit act as two fuses.
    +	 * we only touch standby bit if both are set.
    +	 */
    +	if (static_branch_unlikely(&ploop_standby_check)) {
    +		if (blk_queue_standby_en(ploop_blk_queue(ploop))) {
    +			PL_INFO("standby support enabled\n");
    +			if (blk_queue_standby(ploop_blk_queue(ploop))) {
    +				blk_queue_flag_clear(QUEUE_FLAG_STANDBY,
    +						ploop_blk_queue(ploop));
    +				PL_INFO("recovering device from standby\n");
    +			}
    +		}
    +	} else {
    +		struct request_queue *q = ploop_blk_queue(ploop);
    +#define W_FMT "ploop_standby_check is off on %s but it has bits set: %s%s%s\n"
    +		/* queue flags sanity checks - warn if something looks wrong */
    +		WARN_ONCE(blk_queue_standby(q) || blk_queue_standby_en(q),
    +			W_FMT, ploop_device_name(ploop),
    +			(blk_queue_standby(q) ? "standby" : ""),
    +			(blk_queue_standby(q) && blk_queue_standby_en(q) ?
    +				", " : ""),
    +			(blk_queue_standby_en(q) ? "standby_en" : ""));
    +#undef W_FMT
    +	}
    +
     	if (kstrtou32(argv[0], 10, &ploop->cluster_log) < 0) {
     		ret = -EINVAL;
     		ti->error = "could not parse cluster_log";
    diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
    index 2228096e7743..aea635b6a3fc 100644
    --- a/drivers/md/dm-ploop.h
    +++ b/drivers/md/dm-ploop.h
    @@ -231,6 +231,7 @@ struct ploop {
     	struct timer_list enospc_timer;
     	bool event_enospc;
     };
    +#define ploop_blk_queue(p) ((p)->ti->table->md->queue)

     struct ploop_rq {
     	struct request *rq;
    diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
    index 8fcd753c70b0..bafe008245c0 100644
    --- a/include/linux/blkdev.h
    +++ b/include/linux/blkdev.h
    @@ -434,6 +434,8 @@ struct request_queue {
     #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
     #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
     #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
    +#define QUEUE_FLAG_STANDBY      30      /* unable to handle read/write requests */
    +#define QUEUE_FLAG_STANDBY_EN   31      /* enable standby queue flag */

     #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
     				 (1 << QUEUE_FLAG_SAME_COMP) |		\
    @@ -480,6 +482,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
     #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
     #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
     #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
    +#define blk_queue_standby(q)    test_bit(QUEUE_FLAG_STANDBY, &(q)->queue_flags)
    +#define blk_queue_standby_en(q)    test_bit(QUEUE_FLAG_STANDBY_EN, &(q)->queue_flags)

     extern void blk_set_pm_only(struct request_queue *q);
     extern void blk_clear_pm_only(struct request_queue *q);
    diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
    index 9ee16b66ba4e..8c614f0c8be0 100644
    --- a/kernel/ve/ve.c
    +++ b/kernel/ve/ve.c
    @@ -54,6 +54,15 @@ extern struct kmapset_set sysfs_ve_perms_set;

     static struct kmem_cache *ve_cachep;

    +/*
    + * this static key is used to enable queue standby flags
    + * checks in ploop code. these checks used by drivers that
    + * support managing the queue flags.
    + * But to avoid creating inter module dependencies leave key here
    + */
    +DEFINE_STATIC_KEY_FALSE(ploop_standby_check);
    +EXPORT_SYMBOL(ploop_standby_check);
    +
     static DEFINE_PER_CPU(struct kstat_lat_pcpu_snap_struct, ve0_lat_stats);

     struct ve_struct ve0 = {
    -- 
    2.31.1





More information about the Devel mailing list