[Devel] [RFC PATCH vz9 v6 12/62] dm-ploop: WIP move from wq to kthread

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Fri Dec 13 11:48:30 MSK 2024



On 12/5/24 22:55, Alexander Atanasov wrote:
> This is a base for moving to multithreaded model.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-91821
> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> ---
>   drivers/md/dm-ploop-map.c    | 65 ++++++++++++++++++++++++++++++++----
>   drivers/md/dm-ploop-target.c | 60 +++++++++++++++++++++++++++++++++
>   drivers/md/dm-ploop.h        | 10 ++++++
>   3 files changed, 128 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 0240d6fc376c..28bced7684e5 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -338,6 +338,16 @@ static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
>   }
>   ALLOW_ERROR_INJECTION(ploop_split_pio_to_list, ERRNO);
>   
> +#define USE_KTHREAD
> +static void ploop_schedule_work(struct ploop *ploop)
> +{
> +#ifndef USE_KTHREAD
> +	queue_work(ploop->wq, &ploop->worker);
> +#else
> +	wake_up_process(ploop->kt_worker->task);
> +#endif
> +}
> +
>   static void ploop_dispatch_pio(struct ploop *ploop, struct pio *pio)
>   {
>   	struct llist_head *list = (struct llist_head *)&ploop->pios[pio->queue_list_id];
> @@ -361,7 +371,7 @@ void ploop_dispatch_pios(struct ploop *ploop, struct pio *pio,
>   			ploop_dispatch_pio(ploop, pio);
>   	}
>   
> -	queue_work(ploop->wq, &ploop->worker);
> +	ploop_schedule_work(ploop);
>   }
>   
>   static bool ploop_delay_if_md_busy(struct ploop *ploop, struct md_page *md,
> @@ -693,7 +703,7 @@ static void ploop_complete_cow(struct ploop_cow *cow, blk_status_t bi_status)
>   
>   	ploop_queue_or_fail(ploop, blk_status_to_errno(bi_status), cow_pio);
>   
> -	queue_work(ploop->wq, &ploop->worker);
> +	ploop_schedule_work(ploop);
>   	ploop_free_pio_with_pages(ploop, cow->aux_pio);
>   	kmem_cache_free(cow_cache, cow);
>   }
> @@ -1142,7 +1152,7 @@ static void ploop_queue_resubmit(struct pio *pio)
>   
>   	llist_add((struct llist_node *)(&pio->list), &ploop->llresubmit_pios);
>   
> -	queue_work(ploop->wq, &ploop->worker);
> +	ploop_schedule_work(ploop);
>   }
>   
>   static void ploop_check_standby_mode(struct ploop *ploop, long res)
> @@ -1815,9 +1825,8 @@ static void process_ploop_fsync_work(struct ploop *ploop)
>   	}
>   }
>   
> -void do_ploop_work(struct work_struct *ws)
> +void do_ploop_run_work(struct ploop *ploop)
>   {
> -	struct ploop *ploop = container_of(ws, struct ploop, worker);
>   	LIST_HEAD(deferred_pios);
>   	struct llist_node *llembedded_pios;
>   	struct llist_node *lldeferred_pios;
> @@ -1829,12 +1838,14 @@ void do_ploop_work(struct work_struct *ws)
>   	current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
>   
>   	llembedded_pios = llist_del_all(&ploop->pios[PLOOP_LIST_PREPARE]);
> +	smp_wmb(); /* */

What is the point of using memorry barrier here?

> +
>   	lldeferred_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DEFERRED]);
>   	lldiscard_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DISCARD]);
>   	llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
>   	llresubmit = llist_del_all(&ploop->llresubmit_pios);
>   
> -	/* add old deferred to the list */
> +	/* add old deferred back to the list */
>   	if (lldeferred_pios) {
>   		struct llist_node *pos, *t;
>   		struct pio *pio;
> @@ -1866,6 +1877,46 @@ void do_ploop_work(struct work_struct *ws)
>   	current->flags = old_flags;
>   }
>   
> +void do_ploop_work(struct work_struct *ws)
> +{
> +	struct ploop *ploop = container_of(ws, struct ploop, worker);
> +
> +	do_ploop_run_work(ploop);
> +}
> +
> +int ploop_worker(void *data)
> +{
> +	struct ploop_worker *worker = data;
> +	struct ploop *ploop = worker->ploop;
> +
> +	for (;;) {
> +		set_current_state(TASK_INTERRUPTIBLE);
> +
> +		if (kthread_should_stop()) {
> +			__set_current_state(TASK_RUNNING);
> +			break;
> +		}
> +#ifdef USE_KTHREAD
> +		smp_rmb(); /* */
> +		if (llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]) &&
> +			llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]) &&
> +			llist_empty(&ploop->pios[PLOOP_LIST_DEFERRED]) &&
> +			llist_empty(&ploop->pios[PLOOP_LIST_DISCARD]) &&
> +			llist_empty(&ploop->pios[PLOOP_LIST_COW]) &&
> +			llist_empty(&ploop->llresubmit_pios)
> +			)
> +			schedule();
> +
> +		__set_current_state(TASK_RUNNING);
> +		do_ploop_run_work(ploop);
> +		cond_resched();
> +#else
> +		schedule();	// just do nothing yet
> +#endif
> +	}
> +	return 0;
> +}
> +
>   static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
>   {
>   	struct ploop_rq *prq = pio->endio_cb_data;
> @@ -1890,7 +1941,7 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
>   
>   out:
>   	if (queue)
> -		queue_work(ploop->wq, &ploop->worker);
> +		ploop_schedule_work(ploop);
>   }
>   
>   void ploop_submit_embedded_pios(struct ploop *ploop, struct list_head *list)
> diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
> index ea9af6b6abe9..8fd3f51ff81f 100644
> --- a/drivers/md/dm-ploop-target.c
> +++ b/drivers/md/dm-ploop-target.c
> @@ -166,6 +166,28 @@ static void ploop_destroy(struct ploop *ploop)
>   		destroy_workqueue(ploop->wq);
>   		WARN_ON_ONCE(ploop_has_pending_activity(ploop));
>   	}
> +
> +	if (ploop->kt_worker) {
> +		wake_up_process(ploop->kt_worker->task);
> +		// FIXME: a better way to wait for lists to drain

We already have inflight_bios_ref for that. The only exception for this 
would be delayed writeback.

> +		/* try to send all pending - if we have partial io and enospc end bellow */
> +		while (!llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]) ||
> +			!llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]) ||
> +			!llist_empty(&ploop->pios[PLOOP_LIST_DEFERRED]) ||
> +			!llist_empty(&ploop->pios[PLOOP_LIST_DISCARD]) ||
> +			!llist_empty(&ploop->pios[PLOOP_LIST_COW])
> +			) {
> +			schedule();
> +			smp_rmb(); /* */
> +		}
> +
> +		kthread_stop(ploop->kt_worker->task);	/* waits for the thread to stop */
> +		WARN_ON(!llist_empty(&ploop->pios[PLOOP_LIST_PREPARE]));
> +		WARN_ON(!llist_empty(&ploop->llresubmit_pios));
> +		// TODO: check if any pios left and end them with error
> +		kfree(ploop->kt_worker);
> +	}
> +
>   	for (i = 0; i < 2; i++)
>   		percpu_ref_exit(&ploop->inflight_bios_ref[i]);
>   	/* Nobody uses it after destroy_workqueue() */
> @@ -330,6 +352,39 @@ ALLOW_ERROR_INJECTION(ploop_add_deltas_stack, ERRNO);
>   		argc--;						\
>   		argv++;						\
>   	} while (0);
> +
> +static struct ploop_worker *ploop_worker_create(struct ploop *ploop)
> +{
> +	struct ploop_worker *worker;
> +	struct task_struct *task;
> +
> +	worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
> +	if (!worker)
> +		return NULL;
> +
> +	worker->ploop = ploop;
> +	task = kthread_create(ploop_worker, worker, "ploop-%d-0",
> +				current->pid);
> +
> +	if (IS_ERR(task))
> +		goto out_err;
> +	worker->task = task;
> +
> +	wake_up_process(task);
> +	// TODO: handle cgroups
> +	// ret = ploop_attach_cgroups(worker);
> +	// if (ret)
> +	//         goto stop_worker;
> +
> +	return worker;
> +
> +// stop_worker:
> +//         kthread_stop(worker->task);
> +out_err:
> +	kfree(worker);
> +	return NULL;
> +}
> +#endif
>   /*
>    * <data dev>
>    */
> @@ -474,6 +529,11 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>   	if (argc <= 0)
>   		goto err;
>   
> +
> +	ploop->kt_worker = ploop_worker_create(ploop);
> +	if (!ploop->kt_worker)
> +		goto err;
> +
>   	ret = ploop_add_deltas_stack(ploop, &argv[0], argc);
>   	if (ret)
>   		goto err;
> diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
> index bd4906e4c2b5..f4d3dce02d6c 100644
> --- a/drivers/md/dm-ploop.h
> +++ b/drivers/md/dm-ploop.h
> @@ -139,6 +139,12 @@ enum {
>   	PLOOP_LIST_INVALID = PLOOP_LIST_COUNT,
>   };
>   
> +struct ploop_worker {
> +	struct ploop		*ploop;
> +	struct task_struct	*task;
> +	u64			kcov_handle;
> +};
> +
>   struct ploop {
>   	struct dm_target *ti;
>   #define PLOOP_PRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() */
> @@ -184,6 +190,7 @@ struct ploop {
>   	struct work_struct worker;
>   	struct work_struct event_work;
>   
> +	struct ploop_worker *kt_worker;
>   	struct completion inflight_bios_ref_comp;
>   	struct percpu_ref inflight_bios_ref[2];
>   	bool inflight_ref_comp_pending;
> @@ -599,4 +606,7 @@ extern void ploop_call_rw_iter(struct file *file, loff_t pos, unsigned rw,
>   			       struct iov_iter *iter, struct pio *pio);
>   extern void ploop_enospc_timer(struct timer_list *timer);
>   extern loff_t ploop_llseek_hole(struct dm_target *ti, loff_t offset, int whence);
> +
> +int ploop_worker(void *data);
> +
>   #endif /* __DM_PLOOP_H */


More information about the Devel mailing list