[Devel] [PATCH RHEL8 COMMIT] ploop: Freeze on ENOSPC and notify userspace via dm event

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jun 17 19:03:23 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.44
------>
commit 65c216bd2c624ad0e21cf013d0b8d85fb74226ad
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Thu Jun 17 19:03:23 2021 +0300

    ploop: Freeze on ENOSPC and notify userspace via dm event
    
    This introduces a functionality to delay WRITE requests,
    which previously failed with -ENOSPC. Normally, such the
    requests would ended immediatelly with BLK_STS_NOSPC error.
    After sysadmin increases host disk space, WRITE requests
    will continue to be dispatched automatically.
    READ requests are dispatched no matter the delay state
    of WRITE requests.
    
    Delayed requests are placed in enospc_pios list,
    and ploop tries to resubmit them in usual way
    after timeout. Standard dm way is used to notify
    userspace about ENOSPC event.
    
    Usage in userspace monitor thread.
    
    1)Monitor task waits for next event:
    
    $dmsetup info ploopXXX | grep "Event number"
    Event number:      <event_nr>
    
    $dmsetup wait ploopXXX <event_nr>
    (waits till <event_nr> will increased)
    
    2)After next event occured and the wait completed,
    the task checks for the incomming event:
    
    $dmsetup message ploopXXX 0 get_event
    event_ENOSPC
    
    "event_ENOSPC" means ploop WRITE requests became
    frozen because of underlining fs has no free space.
    Empty output means that there is no a new event
    (this is possible, when "get_event" is called
    without waiting for next event_nr).
    
    3)Monitor task increases the host disk space,
    and then IO continues automatically (less then
    after 20 seconds timeout).
    
    Note, that "dmsetup suspend ploopXXX" cancels all
    delayed requests (otherwise, we won't be able
    to kill device, in case of there is no a possibility
    to increase host space).
    
    https://jira.sw.ru/browse/PSBM-127225
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    =====================
    Patchset description:
    
    ploop: Freeze WRITE on -ENOSPC on host
    
    Suspend WRITEs in case of host's fs returns -ENOSPC.
    
    https://jira.sw.ru/browse/PSBM-127225
    
    Kirill Tkhai (5):
          ploop: Add check of htable is empty on .dtr
          ploop: Rename delayed_pios into suspended_pios
          ploop: Remove unused define and comment
          ploop: Close race in ploop_flip_upper_deltas()
          ploop: Freeze on ENOSPC and notify userspace via dm event
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 drivers/md/dm-ploop-cmd.c    | 22 +++++++++++++++++++
 drivers/md/dm-ploop-map.c    | 51 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/md/dm-ploop-target.c | 22 +++++++++++++++++++
 drivers/md/dm-ploop.h        | 10 +++++++++
 4 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index 2afe909171d8..914a44725270 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -1197,6 +1197,22 @@ void process_deferred_cmd(struct ploop *ploop, struct ploop_index_wb *piwb)
 	spin_lock_irq(&ploop->deferred_lock);
 }
 
+static int ploop_get_event(struct ploop *ploop, char *result, unsigned int maxlen)
+{
+	unsigned int sz = 0;
+	int ret = 0;
+
+	spin_lock_irq(&ploop->deferred_lock);
+	if (ploop->event_enospc) {
+		ret = (DMEMIT("event_ENOSPC\n")) ? 1 : 0;
+		if (ret)
+			ploop->event_enospc = false;
+	}
+	spin_unlock_irq(&ploop->deferred_lock);
+
+	return ret;
+}
+
 static bool msg_wants_down_read(const char *cmd)
 {
 	/* TODO: kill get_delta_name */
@@ -1222,6 +1238,12 @@ int ploop_message(struct dm_target *ti, unsigned int argc, char **argv,
 	if (argc < 1)
 		goto out;
 
+	if (!strcmp(argv[0], "get_event")) {
+		if (argc == 1)
+			ret = ploop_get_event(ploop, result, maxlen);
+		goto out;
+	}
+
 	read = msg_wants_down_read(argv[0]);
 	if (read)
 		down_read(&ploop->ctl_rwsem);
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index a655ab426e8e..c3fd80b5c937 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -988,6 +988,49 @@ static void ploop_queue_resubmit(struct pio *pio)
 	queue_work(ploop->wq, &ploop->fsync_worker);
 }
 
+void ploop_enospc_timer(struct timer_list *timer)
+{
+	struct ploop *ploop = from_timer(ploop, timer, enospc_timer);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ploop->deferred_lock, flags);
+	list_splice_init(&ploop->enospc_pios, &ploop->resubmit_pios);
+	spin_unlock_irqrestore(&ploop->deferred_lock, flags);
+
+	queue_work(ploop->wq, &ploop->fsync_worker);
+}
+
+void ploop_event_work(struct work_struct *ws)
+{
+	struct ploop *ploop = container_of(ws, struct ploop, event_work);
+
+	dm_table_event(ploop->ti->table);
+}
+
+static bool ploop_try_delay_enospc(struct pio *pio)
+{
+	struct ploop *ploop = pio->ploop;
+	bool delayed = true;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ploop->deferred_lock, flags);
+	if (unlikely(ploop->wants_suspend)) {
+		delayed = false;
+		goto unlock;
+	}
+
+	ploop->event_enospc = true;
+	list_add_tail(&pio->list, &ploop->enospc_pios);
+unlock:
+	spin_unlock_irqrestore(&ploop->deferred_lock, flags);
+
+	if (delayed)
+		mod_timer(&ploop->enospc_timer, jiffies + PLOOP_ENOSPC_TIMEOUT);
+	schedule_work(&ploop->event_work);
+
+	return delayed;
+}
+
 static void data_rw_complete(struct pio *pio)
 {
 	bool completed;
@@ -1000,8 +1043,12 @@ static void data_rw_complete(struct pio *pio)
 			ploop_queue_resubmit(pio);
 			return;
 		}
-
-                pio->bi_status = errno_to_blk_status(pio->ret);
+		if (pio->ret == -ENOSPC) {
+			WARN_ON_ONCE(!op_is_write(pio->bi_op));
+			if (ploop_try_delay_enospc(pio))
+				return;
+		}
+		pio->bi_status = errno_to_blk_status(pio->ret);
 	}
 
 	if (pio->is_data_alloc) {
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index da241509af3b..76f66fe11de1 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -321,12 +321,15 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	INIT_LIST_HEAD(&ploop->flush_pios);
 	INIT_LIST_HEAD(&ploop->discard_pios);
 	INIT_LIST_HEAD(&ploop->resubmit_pios);
+	INIT_LIST_HEAD(&ploop->enospc_pios);
 	INIT_LIST_HEAD(&ploop->cluster_lk_list);
 	INIT_LIST_HEAD(&ploop->delta_cow_action_list);
 	ploop->bat_entries = RB_ROOT;
+	timer_setup(&ploop->enospc_timer, ploop_enospc_timer, 0);
 
 	INIT_WORK(&ploop->worker, do_ploop_work);
 	INIT_WORK(&ploop->fsync_worker, do_ploop_fsync_work);
+	INIT_WORK(&ploop->event_work, ploop_event_work);
 	init_completion(&ploop->inflight_bios_ref_comp);
 
 	for (i = 0; i < 2; i++) {
@@ -411,6 +414,14 @@ static void ploop_status(struct dm_target *ti, status_type_t type,
 	read_unlock_irq(&ploop->bat_rwlock);
 }
 
+static void ploop_set_wants_suspend(struct dm_target *ti, bool wants)
+{
+	struct ploop *ploop = ti->private;
+
+	spin_lock_irq(&ploop->deferred_lock);
+	ploop->wants_suspend = wants;
+	spin_unlock_irq(&ploop->deferred_lock);
+}
 static void ploop_set_suspended(struct dm_target *ti, bool suspended)
 {
 	struct ploop *ploop = ti->private;
@@ -422,9 +433,19 @@ static void ploop_set_suspended(struct dm_target *ti, bool suspended)
 
 static void ploop_presuspend(struct dm_target *ti)
 {
+	struct ploop *ploop = ti->private;
+	/*
+	 * For pending enospc requests. Otherwise,
+	 * we may never be able to suspend this target.
+	 */
+	ploop_set_wants_suspend(ti, true);
+	flush_work(&ploop->event_work);
+	del_timer_sync(&ploop->enospc_timer);
+	ploop_enospc_timer(&ploop->enospc_timer);
 }
 static void ploop_presuspend_undo(struct dm_target *ti)
 {
+	ploop_set_wants_suspend(ti, false);
 }
 static void ploop_postsuspend(struct dm_target *ti)
 {
@@ -446,6 +467,7 @@ static int ploop_preresume(struct dm_target *ti)
 		 * no more reasons to break resume.
 		 */
 		ploop_set_suspended(ti, false);
+		ploop_set_wants_suspend(ti, false);
 	}
 	return ret;
 }
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 143060d6cadf..08262e5e48ca 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -77,6 +77,7 @@ struct ploop_cmd {
 #define BAT_ENTRY_NONE		UINT_MAX
 
 #define PLOOP_INFLIGHT_TIMEOUT	(60 * HZ)
+#define PLOOP_ENOSPC_TIMEOUT	(20 * HZ)
 
 #define PLOOP_BIOS_HTABLE_BITS	8
 #define PLOOP_BIOS_HTABLE_SIZE	(1 << PLOOP_BIOS_HTABLE_BITS)
@@ -155,6 +156,7 @@ struct ploop {
 	struct workqueue_struct *wq;
 	struct work_struct worker;
 	struct work_struct fsync_worker;
+	struct work_struct event_work;
 
 	struct completion inflight_bios_ref_comp;
 	struct percpu_ref inflight_bios_ref[2];
@@ -170,6 +172,7 @@ struct ploop {
 	struct list_head flush_pios;
 	struct list_head discard_pios;
 	struct list_head resubmit_pios; /* After partial IO */
+	struct list_head enospc_pios; /* Delayed after ENOSPC */
 
 	struct rw_semaphore ctl_rwsem;
 	struct ploop_cmd *deferred_cmd;
@@ -187,9 +190,14 @@ struct ploop {
 	bool noresume;
 	/* Device is suspended */
 	bool suspended;
+	/* Device wants suspend */
+	bool wants_suspend;
 
 	/* Maintaince in process */
 	bool maintaince;
+
+	struct timer_list enospc_timer;
+	bool event_enospc;
 };
 
 struct ploop_rq {
@@ -491,6 +499,7 @@ extern void submit_pios(struct ploop *ploop, struct list_head *list);
 extern void defer_pios(struct ploop *ploop, struct pio *pio, struct list_head *pio_list);
 extern void do_ploop_work(struct work_struct *ws);
 extern void do_ploop_fsync_work(struct work_struct *ws);
+extern void ploop_event_work(struct work_struct *work);
 extern void process_deferred_cmd(struct ploop *ploop,
 			struct ploop_index_wb *piwb);
 extern int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
@@ -520,4 +529,5 @@ extern int ploop_read_delta_metadata(struct ploop *ploop, struct file *file,
 				     void **d_hdr);
 extern void ploop_call_rw_iter(struct file *file, loff_t pos, unsigned rw,
 			       struct iov_iter *iter, struct pio *pio);
+extern void ploop_enospc_timer(struct timer_list *timer);
 #endif /* __DM_PLOOP_H */


More information about the Devel mailing list