[Devel] [RESEND PATCH vz10 1/2] ve/kthread: fix race when work can be added to stopped kthread worker #VSTOR-106887
    Pavel Tikhomirov 
    ptikhomirov at virtuozzo.com
       
    Thu Oct 23 10:14:08 MSK 2025
    
    
  
Commited to rh10-6.12.0-55.13.1.2.12.vz10
On 10/21/25 17:18, Aleksei Oladko wrote:
> This patch reintroduces the feature from commit 6d43ed1, which was reverted
> due to a scenario where a hanging user-mode-helper in one container could
> block the startup of other containers.
> 
> Race can be reproduced with steps below:
> 
> 1) Add these test patch to increase the race probability:
> 
> kernel/umh.c
> @@ -455,6 +456,8 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
>                  sub_info->queue = call_usermodehelper_queue_ve;
>                  kthread_init_work(&sub_info->ve_work,
>                                    call_usermodehelper_exec_work_ve);
> +             while (VE_IS_RUNNING(ve))
> +                       cond_resched();
>          } else {
>                  sub_info->queue = call_usermodehelper_queue;
>                  INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
> 
> 2) Set corepattern to type pipe in CT:
> 
> echo "|/bin/dd of=/vz/coredumps/core-%e-%t.%p" > /proc/sys/kernel/core_pattern
> 
> 3) Generate high CPU load on all cores using tools like stress-ng
> 
> stress-ng --futex $(nproc) --timeout 60s
> 
> 4) Produce a segfault inside a container and next try to stop the
> container killing init.
> 
> Coredump creates "dd" work and ads it to ve_umh_worker, which is already
> stopped and will never handle these work, and our work will hang
> forever, and container will never stop:
> 
> [<0>] call_usermodehelper_exec+0x168/0x1a0
> [<0>] call_usermodehelper_exec_ve+0x96/0xe0
> [<0>] do_coredump+0x60f/0xf40
> [<0>] get_signal+0x834/0x960
> [<0>] arch_do_signal_or_restart+0x29/0xf0
> [<0>] irqentry_exit_to_user_mode+0x12e/0x1a0
> [<0>] asm_exc_page_fault+0x26/0x30
> 
> Fix is:
> 
> 1) Before calling call_usermodehelper_exec for a ve, check that
> the ve is running before adding work.
> 
> 2) Add separate hepler counters for each ve.
> 
> 3) In ve_stop_ns, after setting the VE_STATE_STOPPING state,
> wait for the running helpers count to reach 0 before stopping umh.
> 
> There are 2 cases:
> 
> If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
> after ve_stop_ns started wait_khelpers, it will already see that
> the ve is no in the running state and will no queue the work.
> 
> If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
> before the VE_STATE_STOPPING state is set for the ve, then ve_stop_ns will
> wait in wait_khelpers until all works have been queued. After that all
> queued works will be processed in kthread_flush_worker.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-106887
> 
> Signed-off-by: Aleksei Oladko <aleksey.oladko at virtuozzo.com>
> ---
>   include/linux/umh.h |  2 ++
>   include/linux/ve.h  |  2 ++
>   kernel/umh.c        | 27 ++++++++++++++++++++++++++-
>   kernel/ve/ve.c      |  5 +++++
>   4 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/umh.h b/include/linux/umh.h
> index 5647f1e64e39..35fc4023df74 100644
> --- a/include/linux/umh.h
> +++ b/include/linux/umh.h
> @@ -56,6 +56,8 @@ extern int
>   call_usermodehelper_exec_ve(struct ve_struct *ve,
>   			    struct subprocess_info *info, int wait);
>   
> +extern void wait_khelpers(struct ve_struct *ve);
> +
>   #else /* CONFIG_VE */
>   
>   #define call_usermodehelper_ve(ve, ...)		\
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index e944132f972f..37562dff25aa 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -93,6 +93,8 @@ struct ve_struct {
>   	struct kthread_worker	*kthreadd_worker;
>   	struct task_struct	*kthreadd_task;
>   
> +	atomic_t                umh_running_helpers;
> +	struct wait_queue_head  umh_helpers_waitq;
>   	struct kthread_worker	umh_worker;
>   	struct task_struct	*umh_task;
>   
> diff --git a/kernel/umh.c b/kernel/umh.c
> index bd49c108eb90..699403efe382 100644
> --- a/kernel/umh.c
> +++ b/kernel/umh.c
> @@ -447,9 +447,30 @@ static void call_usermodehelper_exec_work_ve(struct kthread_work *work)
>   	}
>   }
>   
> +static void umh_running_helpers_inc(struct ve_struct *ve)
> +{
> +	atomic_inc(&ve->umh_running_helpers);
> +	smp_mb__after_atomic();
> +}
> +
> +static void umh_running_helpers_dec(struct ve_struct *ve)
> +{
> +	if (atomic_dec_and_test(&ve->umh_running_helpers))
> +		wake_up(&ve->umh_helpers_waitq);
> +}
> +
> +void wait_khelpers(struct ve_struct *ve)
> +{
> +	wait_event(ve->umh_helpers_waitq,
> +		   atomic_read(&ve->umh_running_helpers) == 0);
> +}
> +EXPORT_SYMBOL(wait_khelpers);
> +
>   int call_usermodehelper_exec_ve(struct ve_struct *ve,
>   				struct subprocess_info *sub_info, int wait)
>   {
> +	int ret = -EBUSY;
> +
>   	if (!ve_is_super(ve)) {
>   		sub_info->ve = ve;
>   		sub_info->queue = call_usermodehelper_queue_ve;
> @@ -460,7 +481,11 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
>   		INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
>   	}
>   
> -	return call_usermodehelper_exec(sub_info, wait);
> +	umh_running_helpers_inc(ve);
> +	if (VE_IS_RUNNING(ve))
> +		ret = call_usermodehelper_exec(sub_info, wait);
> +	umh_running_helpers_dec(ve);
> +	return ret;
>   }
>   EXPORT_SYMBOL(call_usermodehelper_exec_ve);
>   
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index cb77f7b7e4cd..d5dc15942ab5 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -88,6 +88,8 @@ struct ve_struct ve0 = {
>   	.nd_neigh_nr		= ATOMIC_INIT(0),
>   	.mnt_nr			= ATOMIC_INIT(0),
>   	.meminfo_val		= VE_MEMINFO_SYSTEM,
> +	.umh_running_helpers    = ATOMIC_INIT(0),
> +	.umh_helpers_waitq      = __WAIT_QUEUE_HEAD_INITIALIZER(ve0.umh_helpers_waitq),
>   	.vdso_64		= (struct vdso_image*)&vdso_image_64,
>   	.vdso_32		= (struct vdso_image*)&vdso_image_32,
>   	.release_list_lock	= __SPIN_LOCK_UNLOCKED(
> @@ -480,6 +482,8 @@ static int ve_start_umh(struct ve_struct *ve)
>   {
>   	struct task_struct *task;
>   
> +	atomic_set(&ve->umh_running_helpers, 0);
> +	init_waitqueue_head(&ve->umh_helpers_waitq);
>   	kthread_init_worker(&ve->umh_worker);
>   
>   	task = kthread_create_on_node_ve_flags(ve, 0, kthread_worker_fn,
> @@ -814,6 +818,7 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
>   	 */
>   	up_write(&ve->op_sem);
>   
> +	wait_khelpers(ve);
>   	/*
>   	 * release_agent works on top of umh_worker, so we must make sure, that
>   	 * ve workqueue is stopped first.
-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.
    
    
More information about the Devel
mailing list