[Devel] [RESEND PATCH vz10 1/2] ve/kthread: fix race when work can be added to stopped kthread worker #VSTOR-106887

Konstantin Khorenko khorenko at virtuozzo.com
Tue Oct 28 19:15:02 MSK 2025


And where is the original patch mail?

i don't see it in the list:
https://lists.openvz.org/pipermail/devel/2025-October/date.html

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 10/21/25 12:25, Pavel Tikhomirov wrote:
> Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> 
> (for both patches, will merge it tomorrow)
> 
> On 10/21/25 17:18, Aleksei Oladko wrote:
>> This patch reintroduces the feature from commit 6d43ed1, which was reverted
>> due to a scenario where a hanging user-mode-helper in one container could
>> block the startup of other containers.
>>
>> Race can be reproduced with steps below:
>>
>> 1) Add these test patch to increase the race probability:
>>
>> kernel/umh.c
>> @@ -455,6 +456,8 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
>>                   sub_info->queue = call_usermodehelper_queue_ve;
>>                   kthread_init_work(&sub_info->ve_work,
>>                                     call_usermodehelper_exec_work_ve);
>> +             while (VE_IS_RUNNING(ve))
>> +                       cond_resched();
>>           } else {
>>                   sub_info->queue = call_usermodehelper_queue;
>>                   INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
>>
>> 2) Set corepattern to type pipe in CT:
>>
>> echo "|/bin/dd of=/vz/coredumps/core-%e-%t.%p" > /proc/sys/kernel/core_pattern
>>
>> 3) Generate high CPU load on all cores using tools like stress-ng
>>
>> stress-ng --futex $(nproc) --timeout 60s
>>
>> 4) Produce a segfault inside a container and next try to stop the
>> container killing init.
>>
>> Coredump creates "dd" work and ads it to ve_umh_worker, which is already
>> stopped and will never handle these work, and our work will hang
>> forever, and container will never stop:
>>
>> [<0>] call_usermodehelper_exec+0x168/0x1a0
>> [<0>] call_usermodehelper_exec_ve+0x96/0xe0
>> [<0>] do_coredump+0x60f/0xf40
>> [<0>] get_signal+0x834/0x960
>> [<0>] arch_do_signal_or_restart+0x29/0xf0
>> [<0>] irqentry_exit_to_user_mode+0x12e/0x1a0
>> [<0>] asm_exc_page_fault+0x26/0x30
>>
>> Fix is:
>>
>> 1) Before calling call_usermodehelper_exec for a ve, check that
>> the ve is running before adding work.
>>
>> 2) Add separate hepler counters for each ve.
>>
>> 3) In ve_stop_ns, after setting the VE_STATE_STOPPING state,
>> wait for the running helpers count to reach 0 before stopping umh.
>>
>> There are 2 cases:
>>
>> If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
>> after ve_stop_ns started wait_khelpers, it will already see that
>> the ve is no in the running state and will no queue the work.
>>
>> If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
>> before the VE_STATE_STOPPING state is set for the ve, then ve_stop_ns will
>> wait in wait_khelpers until all works have been queued. After that all
>> queued works will be processed in kthread_flush_worker.
>>
>> https://virtuozzo.atlassian.net/browse/VSTOR-106887
>>
>> Signed-off-by: Aleksei Oladko <aleksey.oladko at virtuozzo.com>
>> ---
>>    include/linux/umh.h |  2 ++
>>    include/linux/ve.h  |  2 ++
>>    kernel/umh.c        | 27 ++++++++++++++++++++++++++-
>>    kernel/ve/ve.c      |  5 +++++
>>    4 files changed, 35 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/umh.h b/include/linux/umh.h
>> index 5647f1e64e39..35fc4023df74 100644
>> --- a/include/linux/umh.h
>> +++ b/include/linux/umh.h
>> @@ -56,6 +56,8 @@ extern int
>>    call_usermodehelper_exec_ve(struct ve_struct *ve,
>>    			    struct subprocess_info *info, int wait);
>>    
>> +extern void wait_khelpers(struct ve_struct *ve);
>> +
>>    #else /* CONFIG_VE */
>>    
>>    #define call_usermodehelper_ve(ve, ...)		\
>> diff --git a/include/linux/ve.h b/include/linux/ve.h
>> index e944132f972f..37562dff25aa 100644
>> --- a/include/linux/ve.h
>> +++ b/include/linux/ve.h
>> @@ -93,6 +93,8 @@ struct ve_struct {
>>    	struct kthread_worker	*kthreadd_worker;
>>    	struct task_struct	*kthreadd_task;
>>    
>> +	atomic_t                umh_running_helpers;
>> +	struct wait_queue_head  umh_helpers_waitq;
>>    	struct kthread_worker	umh_worker;
>>    	struct task_struct	*umh_task;
>>    
>> diff --git a/kernel/umh.c b/kernel/umh.c
>> index bd49c108eb90..699403efe382 100644
>> --- a/kernel/umh.c
>> +++ b/kernel/umh.c
>> @@ -447,9 +447,30 @@ static void call_usermodehelper_exec_work_ve(struct kthread_work *work)
>>    	}
>>    }
>>    
>> +static void umh_running_helpers_inc(struct ve_struct *ve)
>> +{
>> +	atomic_inc(&ve->umh_running_helpers);
>> +	smp_mb__after_atomic();
>> +}
>> +
>> +static void umh_running_helpers_dec(struct ve_struct *ve)
>> +{
>> +	if (atomic_dec_and_test(&ve->umh_running_helpers))
>> +		wake_up(&ve->umh_helpers_waitq);
>> +}
>> +
>> +void wait_khelpers(struct ve_struct *ve)
>> +{
>> +	wait_event(ve->umh_helpers_waitq,
>> +		   atomic_read(&ve->umh_running_helpers) == 0);
>> +}
>> +EXPORT_SYMBOL(wait_khelpers);
>> +
>>    int call_usermodehelper_exec_ve(struct ve_struct *ve,
>>    				struct subprocess_info *sub_info, int wait)
>>    {
>> +	int ret = -EBUSY;
>> +
>>    	if (!ve_is_super(ve)) {
>>    		sub_info->ve = ve;
>>    		sub_info->queue = call_usermodehelper_queue_ve;
>> @@ -460,7 +481,11 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
>>    		INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
>>    	}
>>    
>> -	return call_usermodehelper_exec(sub_info, wait);
>> +	umh_running_helpers_inc(ve);
>> +	if (VE_IS_RUNNING(ve))
>> +		ret = call_usermodehelper_exec(sub_info, wait);
>> +	umh_running_helpers_dec(ve);
>> +	return ret;
>>    }
>>    EXPORT_SYMBOL(call_usermodehelper_exec_ve);
>>    
>> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
>> index cb77f7b7e4cd..d5dc15942ab5 100644
>> --- a/kernel/ve/ve.c
>> +++ b/kernel/ve/ve.c
>> @@ -88,6 +88,8 @@ struct ve_struct ve0 = {
>>    	.nd_neigh_nr		= ATOMIC_INIT(0),
>>    	.mnt_nr			= ATOMIC_INIT(0),
>>    	.meminfo_val		= VE_MEMINFO_SYSTEM,
>> +	.umh_running_helpers    = ATOMIC_INIT(0),
>> +	.umh_helpers_waitq      = __WAIT_QUEUE_HEAD_INITIALIZER(ve0.umh_helpers_waitq),
>>    	.vdso_64		= (struct vdso_image*)&vdso_image_64,
>>    	.vdso_32		= (struct vdso_image*)&vdso_image_32,
>>    	.release_list_lock	= __SPIN_LOCK_UNLOCKED(
>> @@ -480,6 +482,8 @@ static int ve_start_umh(struct ve_struct *ve)
>>    {
>>    	struct task_struct *task;
>>    
>> +	atomic_set(&ve->umh_running_helpers, 0);
>> +	init_waitqueue_head(&ve->umh_helpers_waitq);
>>    	kthread_init_worker(&ve->umh_worker);
>>    
>>    	task = kthread_create_on_node_ve_flags(ve, 0, kthread_worker_fn,
>> @@ -814,6 +818,7 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
>>    	 */
>>    	up_write(&ve->op_sem);
>>    
>> +	wait_khelpers(ve);
>>    	/*
>>    	 * release_agent works on top of umh_worker, so we must make sure, that
>>    	 * ve workqueue is stopped first.
> 



More information about the Devel mailing list