[Devel] [RFC PATCH vz9 v5 12/49] dm-ploop: WIP move from wq to kthread

Alexander Atanasov alexander.atanasov at virtuozzo.com
Mon Nov 18 09:24:39 MSK 2024


This is a base for moving to multithreaded model.

https://virtuozzo.atlassian.net/browse/VSTOR-91821
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
---
 drivers/md/dm-ploop-map.c    | 65 ++++++++++++++++++++++++++++++++----
 drivers/md/dm-ploop-target.c | 60 +++++++++++++++++++++++++++++++++
 drivers/md/dm-ploop.h        | 10 ++++++
 3 files changed, 128 insertions(+), 7 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 83b4b8148b10..7acb86ad8b46 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -337,6 +337,16 @@ static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
 }
 ALLOW_ERROR_INJECTION(ploop_split_pio_to_list, ERRNO);
 
+#define USE_KTHREAD
+static void ploop_schedule_work(struct ploop *ploop)
+{
+#ifndef USE_KTHREAD
+	queue_work(ploop->wq, &ploop->worker);
+#else
+	wake_up_process(ploop->kt_worker->task);
+#endif
+}
+
 static void ploop_dispatch_pio(struct ploop *ploop, struct pio *pio)
 {
 	struct llist_head *list = (struct llist_head *)&ploop->pios[pio->queue_list_id];
@@ -360,7 +370,7 @@ void ploop_dispatch_pios(struct ploop *ploop, struct pio *pio,
 			ploop_dispatch_pio(ploop, pio);
 	}
 
-	queue_work(ploop->wq, &ploop->worker);
+	ploop_schedule_work(ploop);
 }
 
 static bool ploop_delay_if_md_busy(struct ploop *ploop, struct md_page *md,
@@ -692,7 +702,7 @@ static void ploop_complete_cow(struct ploop_cow *cow, blk_status_t bi_status)
 
 	ploop_queue_or_fail(ploop, blk_status_to_errno(bi_status), cow_pio);
 
-	queue_work(ploop->wq, &ploop->worker);
+	ploop_schedule_work(ploop);
 	ploop_free_pio_with_pages(ploop, cow->aux_pio);
 	kmem_cache_free(cow_cache, cow);
 }
@@ -1141,7 +1151,7 @@ static void ploop_queue_resubmit(struct pio *pio)
 
 	llist_add((struct llist_node *)(&pio->list), &ploop->llresubmit_pios);
 
-	queue_work(ploop->wq, &ploop->worker);
+	ploop_schedule_work(ploop);
 }
 
 static void ploop_check_standby_mode(struct ploop *ploop, long res)
@@ -1813,9 +1823,8 @@ static void process_ploop_fsync_work(struct ploop *ploop)
 	}
 }
 
-void do_ploop_work(struct work_struct *ws)
+void do_ploop_run_work(struct ploop *ploop)
 {
-	struct ploop *ploop = container_of(ws, struct ploop, worker);
 	LIST_HEAD(deferred_pios);
 	struct llist_node *llembedded_pios;
 	struct llist_node *lldeferred_pios;
@@ -1827,12 +1836,14 @@ void do_ploop_work(struct work_struct *ws)
 	current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
 
 	llembedded_pios = llist_del_all(&ploop->pios[PLOOP_LIST_PREPARE]);
+        smp_wmb();
+
 	lldeferred_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DEFERRED]);
 	lldiscard_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DISCARD]);
 	llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
 	llresubmit = llist_del_all(&ploop->llresubmit_pios);
 
-	/* add old deferred to the list */
+	/* add old deferred back to the list */
 	if (lldeferred_pios) {
 		struct llist_node *pos, *t;
 		struct pio *pio;
@@ -1864,6 +1875,46 @@ void do_ploop_work(struct work_struct *ws)
 	current->flags = old_flags;
 }
 
+void do_ploop_work(struct work_struct *ws)
+{
+	struct ploop *ploop = container_of(ws, struct ploop, worker);
+	do_ploop_run_work(ploop);
+}
+
+int ploop_worker(void *data)
+{
+	struct ploop_worker *worker = data;
+	struct ploop *ploop = worker->ploop;
+
+        for (;;) {
+                set_current_state(TASK_INTERRUPTIBLE);
+
+                if (kthread_should_stop()) {
+                        __set_current_state(TASK_RUNNING);
+                        break;
+                }
+#ifdef USE_KTHREAD
+		smp_rmb();
+		if (llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]) &&
+			llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]) &&
+			llist_empty(&ploop->pios[PLOOP_LIST_DEFERRED]) &&
+			llist_empty(&ploop->pios[PLOOP_LIST_DISCARD]) &&
+			llist_empty(&ploop->pios[PLOOP_LIST_COW]) &&
+			llist_empty(&ploop->llresubmit_pios)
+			)
+				schedule();
+
+		__set_current_state(TASK_RUNNING);
+		do_ploop_run_work(ploop);
+                cond_resched();
+#else
+		schedule();	// just do nothing yet
+#endif
+        }
+        return 0;
+
+}
+
 static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
 {
 	struct ploop_rq *prq = pio->endio_cb_data;
@@ -1888,7 +1939,7 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
 
 out:
 	if (queue)
-		queue_work(ploop->wq, &ploop->worker);
+		ploop_schedule_work(ploop);
 }
 
 void ploop_submit_embedded_pios(struct ploop *ploop, struct list_head *list)
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index ea9af6b6abe9..06a429a23302 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -166,6 +166,28 @@ static void ploop_destroy(struct ploop *ploop)
 		destroy_workqueue(ploop->wq);
 		WARN_ON_ONCE(ploop_has_pending_activity(ploop));
 	}
+
+	if (ploop->kt_worker) {
+		wake_up_process(ploop->kt_worker->task);
+		// FIXME: a better way to wait for lists to drain
+		/* try to send all pending - if we have partial io and enospc end bellow */
+		while(!llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]) ||
+			!llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]) ||
+			!llist_empty(&ploop->pios[PLOOP_LIST_DEFERRED]) ||
+			!llist_empty(&ploop->pios[PLOOP_LIST_DISCARD]) ||
+			!llist_empty(&ploop->pios[PLOOP_LIST_COW])
+			) {
+				schedule();
+				smp_rmb();
+			}
+
+	        kthread_stop(ploop->kt_worker->task);	/* waits for the thread to stop */
+		WARN_ON(!llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]));
+		WARN_ON(!llist_empty(&ploop->llresubmit_pios));
+	       	// TODO: check if any pios left and end them with error
+        	kfree(ploop->kt_worker);
+	}
+
 	for (i = 0; i < 2; i++)
 		percpu_ref_exit(&ploop->inflight_bios_ref[i]);
 	/* Nobody uses it after destroy_workqueue() */
@@ -330,6 +352,39 @@ ALLOW_ERROR_INJECTION(ploop_add_deltas_stack, ERRNO);
 		argc--;						\
 		argv++;						\
 	} while (0);
+
+static struct ploop_worker *ploop_worker_create(struct ploop *ploop)
+{
+	struct ploop_worker *worker;
+	struct task_struct *task;
+
+	worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
+        if (!worker)
+                return NULL;
+
+        worker->ploop = ploop;
+	task = kthread_create(ploop_worker, worker, "ploop-%d-0",
+			current->pid);
+
+	if (IS_ERR(task))
+		goto out_err;
+	worker->task = task;
+
+	wake_up_process(task);
+	// TODO: handle cgroups
+        // ret = ploop_attach_cgroups(worker);
+        // if (ret)
+        //         goto stop_worker;
+
+        return worker;
+
+// stop_worker:
+//         kthread_stop(worker->task);
+out_err:
+	kfree(worker);
+	return NULL;
+}
+#endif
 /*
  * <data dev>
  */
@@ -474,6 +529,11 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (argc <= 0)
 		goto err;
 
+
+	ploop->kt_worker = ploop_worker_create(ploop);
+	if (!ploop->kt_worker)
+		goto err;
+
 	ret = ploop_add_deltas_stack(ploop, &argv[0], argc);
 	if (ret)
 		goto err;
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index bd4906e4c2b5..803f3f324550 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -139,6 +139,12 @@ enum {
 	PLOOP_LIST_INVALID = PLOOP_LIST_COUNT,
 };
 
+struct ploop_worker {
+	struct ploop 		*ploop;
+	struct task_struct 	*task;
+	u64			kcov_handle;
+};
+
 struct ploop {
 	struct dm_target *ti;
 #define PLOOP_PRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() */
@@ -184,6 +190,7 @@ struct ploop {
 	struct work_struct worker;
 	struct work_struct event_work;
 
+	struct ploop_worker *kt_worker;
 	struct completion inflight_bios_ref_comp;
 	struct percpu_ref inflight_bios_ref[2];
 	bool inflight_ref_comp_pending;
@@ -599,4 +606,7 @@ extern void ploop_call_rw_iter(struct file *file, loff_t pos, unsigned rw,
 			       struct iov_iter *iter, struct pio *pio);
 extern void ploop_enospc_timer(struct timer_list *timer);
 extern loff_t ploop_llseek_hole(struct dm_target *ti, loff_t offset, int whence);
+
+int ploop_worker(void *data);
+
 #endif /* __DM_PLOOP_H */
-- 
2.43.0



More information about the Devel mailing list