[Devel] [PATCH RH7] ve/kthread: fix race when work can be added to stopped kthread worker
Kirill Tkhai
ktkhai at virtuozzo.com
Wed Mar 21 14:29:44 MSK 2018
On 21.03.2018 10:19, Pavel Tikhomirov wrote:
> note: resending due to missed devel list in CC
>
> Race can be reproduced with steps below:
>
> 1) Add these test patch to increase the race probability:
>
> kernel/kmod.c
> @@ -977,6 +977,8 @@ int call_usermodehelper_fns_ve(struct ve_struct *ve,
> khelper = ve_is_super(ve) ? &khelper_worker : &ve->ve_umh_worker;
>
> if (ve_is_super(ve) || (get_exec_env() == ve)) {
> + while(ve->is_running)
> + cond_resched();
> err = call_usermodehelper_by(khelper, path, argv, envp, wait, init,
> cleanup, data);
> goto out_put;
>
> 2) Set corepattern to type pipe in CT:
>
> echo "|/bin/dd of=/vz/coredumps/core-%e-%t.%p" > /proc/sys/kernel/core_pattern
>
> 3) Produce a segfault inside a container and next try to stop the
> container killing init.
>
> Coredump creates "dd" work and ads it to ve_umh_worker, which is already
> stopped and will never handle these work, and our work will hang
> forever, and container will never stop:
>
> [root at kuchy ~]# cat /proc/6064/stack
> [<ffffffff810a86f9>] __call_usermodehelper_exec+0x179/0x1a0
> [<ffffffff810a8eae>] call_usermodehelper_by+0x5e/0xa0
> [<ffffffff810a901c>] call_usermodehelper_fns_ve+0xec/0x170
> [<ffffffff8128600c>] do_coredump+0x4dc/0xaf0
> [<ffffffff810a1cc5>] get_signal_to_deliver+0x1c5/0x5e0
> [<ffffffff8102a387>] do_signal+0x57/0x6b0
> [<ffffffff8102aa3f>] do_notify_resume+0x5f/0xb0
> [<ffffffff816df6ec>] retint_signal+0x48/0x8c
> [<ffffffffffffffff>] 0xffffffffffffffff
>
> Fix these by putting is_running check and usermodhelper queueing in one
> op_sem lock critical section. It works as we already have these lock
> taken in ve_stop_ns.
>
> commit a08505af8c51 ("ve/kthread: khelper kthread in a container
> introduced") says that we can't hold lock when waiting for userspace so
> drop the lock when waiting.
>
> https://jira.sw.ru/browse/PSBM-82490
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
> include/linux/kmod.h | 6 ++--
> kernel/kmod.c | 81 +++++++++++++++++++++++++++++++---------------------
> 2 files changed, 51 insertions(+), 36 deletions(-)
>
> diff --git a/include/linux/kmod.h b/include/linux/kmod.h
> index f9c18741d1a4..9bd5776b1302 100644
> --- a/include/linux/kmod.h
> +++ b/include/linux/kmod.h
> @@ -72,10 +72,10 @@ struct subprocess_info {
> };
>
> extern int
> -call_usermodehelper_by(struct kthread_worker *worker,
> - char *path, char **argv, char **envp, int wait,
> +call_usermodehelper_by(char *path, char **argv, char **envp, int wait,
> int (*init)(struct subprocess_info *info, struct cred *new),
> - void (*cleanup)(struct subprocess_info *), void *data);
> + void (*cleanup)(struct subprocess_info *), void *data,
> + struct ve_struct *ve);
There is not only a race fix, but also preparations like changing function arguments.
Can we move them to separate patch or there are some limitations to do that?
Thanks,
Kirill
> extern int
> call_usermodehelper(char *path, char **argv, char **envp, int wait);
>
> diff --git a/kernel/kmod.c b/kernel/kmod.c
> index bb7671bc0177..2b45f06cbe51 100644
> --- a/kernel/kmod.c
> +++ b/kernel/kmod.c
> @@ -78,8 +78,8 @@ static void free_modprobe_argv(struct subprocess_info *info)
> kfree(info->argv);
> }
>
> -static int __call_usermodehelper_exec(struct kthread_worker *worker,
> - struct subprocess_info *sub_info, int wait);
> +static int __call_usermodehelper_exec(struct subprocess_info *sub_info,
> + int wait, struct ve_struct *ve);
>
> static int call_modprobe(char *module_name, int wait, int blacklist)
> {
> @@ -118,7 +118,7 @@ static int call_modprobe(char *module_name, int wait, int blacklist)
> * We enter to this function with the right permittions, so
> * it's possible to directly call __call_usermodehelper_exec()
> */
> - return __call_usermodehelper_exec(&khelper_worker, info, wait | UMH_KILLABLE);
> + return __call_usermodehelper_exec(info, wait | UMH_KILLABLE, NULL);
>
> free_module_name:
> kfree(module_name);
> @@ -862,6 +862,19 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
> }
> EXPORT_SYMBOL(call_usermodehelper_setup);
>
> +static void ve_op_sem_down_read(struct ve_struct *ve)
> +{
> + if (ve)
> + down_read(&ve->op_sem);
> +}
> +
> +static void ve_op_sem_up_read(struct ve_struct *ve)
> +{
> + if (ve)
> + up_read(&ve->op_sem);
> +}
> +
> +
> /**
> * call_usermodehelper_exec - start a usermode application
> * @sub_info: information about the subprocessa
> @@ -874,9 +887,11 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
> * asynchronously if wait is not set, and runs as a child of keventd.
> * (ie. it runs with full root capabilities).
> */
> -static int __call_usermodehelper_exec(struct kthread_worker *worker,
> - struct subprocess_info *sub_info, int wait)
> +static int __call_usermodehelper_exec(struct subprocess_info *sub_info,
> + int wait, struct ve_struct *ve)
> {
> + struct kthread_worker *worker = ve ? &ve->ve_umh_worker
> + : &khelper_worker;
> DECLARE_COMPLETION_ONSTACK(done);
> int retval = 0;
>
> @@ -912,7 +927,9 @@ static int __call_usermodehelper_exec(struct kthread_worker *worker,
> goto unlock;
>
> if (wait & UMH_KILLABLE) {
> + ve_op_sem_up_read(ve);
> retval = wait_for_completion_killable(&done);
> + ve_op_sem_down_read(ve);
> if (!retval)
> goto wait_done;
>
> @@ -922,7 +939,9 @@ static int __call_usermodehelper_exec(struct kthread_worker *worker,
> /* fallthrough, umh_complete() was already called */
> }
>
> + ve_op_sem_up_read(ve);
> wait_for_completion(&done);
> + ve_op_sem_down_read(ve);
> wait_done:
> retval = sub_info->retval;
> out:
> @@ -937,7 +956,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
> if (!ve_is_super(get_exec_env()))
> return -EPERM;
>
> - return __call_usermodehelper_exec(&khelper_worker, sub_info, wait);
> + return __call_usermodehelper_exec(sub_info, wait, NULL);
> }
> EXPORT_SYMBOL(call_usermodehelper_exec);
>
> @@ -956,8 +975,8 @@ EXPORT_SYMBOL(call_usermodehelper_exec);
> */
> int call_usermodehelper(char *path, char **argv, char **envp, int wait)
> {
> - return call_usermodehelper_by(&khelper_worker, path, argv, envp,
> - wait, NULL, NULL, NULL);
> + return call_usermodehelper_by(path, argv, envp,
> + wait, NULL, NULL, NULL, NULL);
> }
> EXPORT_SYMBOL(call_usermodehelper);
>
> @@ -968,37 +987,33 @@ int call_usermodehelper_fns_ve(struct ve_struct *ve,
> void (*cleanup)(struct subprocess_info *), void *data)
> {
> int err;
> - struct kthread_worker *khelper;
>
> ve = get_ve(ve);
> if (!ve)
> return -EFAULT;
>
> - khelper = ve_is_super(ve) ? &khelper_worker : &ve->ve_umh_worker;
> -
> - if (ve_is_super(ve) || (get_exec_env() == ve)) {
> - err = call_usermodehelper_by(khelper, path, argv, envp, wait, init,
> - cleanup, data);
> - goto out_put;
> - }
> -
> - if (wait > UMH_WAIT_EXEC) {
> - printk(KERN_ERR "VE#%s: Sleeping call for containers UMH is "
> - "not supported\n", ve->ve_name);
> - err = -EINVAL;
> + if (ve_is_super(ve)) {
> + err = call_usermodehelper_by(path, argv, envp,
> + wait, init, cleanup, data, NULL);
> goto out_put;
> }
>
> + /*
> + * These lock is needed to remove race with ve_stop_ns for containers,
> + * we should not be able to add work to stopped khelper, what can lead
> + * to infinite wait for work's completion.
> + *
> + * Waiting for user-space with semaphore taken is wrong, so
> + * __call_usermodehelper_exec will release it as soon as the work is
> + * queued and wait will be done unlocked.
> + */
> down_read(&ve->op_sem);
> err = -EPIPE;
> - if (!ve->is_running)
> - goto out;
> -
> - err = call_usermodehelper_by(khelper, path, argv, envp, wait, init,
> - cleanup, data);
> -
> -out:
> + if (ve->is_running)
> + err = call_usermodehelper_by(path, argv, envp,
> + wait, init, cleanup, data, ve);
> up_read(&ve->op_sem);
> +
> out_put:
> put_ve(ve);
> return err;
> @@ -1006,15 +1021,15 @@ int call_usermodehelper_fns_ve(struct ve_struct *ve,
> EXPORT_SYMBOL(call_usermodehelper_fns_ve);
> #endif
>
> -int call_usermodehelper_by(struct kthread_worker *worker,
> - char *path, char **argv, char **envp, int wait,
> +int call_usermodehelper_by(char *path, char **argv, char **envp, int wait,
> int (*init)(struct subprocess_info *info, struct cred *new),
> - void (*cleanup)(struct subprocess_info *), void *data)
> + void (*cleanup)(struct subprocess_info *), void *data,
> + struct ve_struct *ve)
> {
> struct subprocess_info *info;
> gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
>
> - if (worker == &khelper_worker && !ve_is_super(get_exec_env()))
> + if (!ve && !ve_is_super(get_exec_env()))
> return -EPERM;
>
> info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
> @@ -1022,7 +1037,7 @@ int call_usermodehelper_by(struct kthread_worker *worker,
> if (info == NULL)
> return -ENOMEM;
>
> - return __call_usermodehelper_exec(worker, info, wait);
> + return __call_usermodehelper_exec(info, wait, ve);
> }
> EXPORT_SYMBOL(call_usermodehelper_by);
>
>
More information about the Devel
mailing list