[CRIU] [PATCH RH7] ve/kthread: fix race when work can be added to stopped kthread worker
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Tue Mar 20 18:59:13 MSK 2018
Race can be reproduced with steps below:
1) Add these test patch to increase the race probability:
kernel/kmod.c
@@ -977,6 +977,8 @@ int call_usermodehelper_fns_ve(struct ve_struct *ve,
khelper = ve_is_super(ve) ? &khelper_worker : &ve->ve_umh_worker;
if (ve_is_super(ve) || (get_exec_env() == ve)) {
+ while(ve->is_running)
+ cond_resched();
err = call_usermodehelper_by(khelper, path, argv, envp, wait, init,
cleanup, data);
goto out_put;
2) Set corepattern to type pipe in CT:
echo "|/bin/dd of=/vz/coredumps/core-%e-%t.%p" > /proc/sys/kernel/core_pattern
3) Produce a segfault inside a container and next try to stop the
container killing init.
Coredump creates "dd" work and ads it to ve_umh_worker, which is already
stopped and will never handle these work, and our work will hang
forever, and container will never stop:
[root at kuchy ~]# cat /proc/6064/stack
[<ffffffff810a86f9>] __call_usermodehelper_exec+0x179/0x1a0
[<ffffffff810a8eae>] call_usermodehelper_by+0x5e/0xa0
[<ffffffff810a901c>] call_usermodehelper_fns_ve+0xec/0x170
[<ffffffff8128600c>] do_coredump+0x4dc/0xaf0
[<ffffffff810a1cc5>] get_signal_to_deliver+0x1c5/0x5e0
[<ffffffff8102a387>] do_signal+0x57/0x6b0
[<ffffffff8102aa3f>] do_notify_resume+0x5f/0xb0
[<ffffffff816df6ec>] retint_signal+0x48/0x8c
[<ffffffffffffffff>] 0xffffffffffffffff
Fix these by putting is_running check and usermodhelper queueing in one
op_sem lock critical section. It works as we already have these lock
taken in ve_stop_ns.
commit a08505af8c51 ("ve/kthread: khelper kthread in a container
introduced") says that we can't hold lock when waiting for userspace so
drop the lock when waiting.
https://jira.sw.ru/browse/PSBM-82490
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
include/linux/kmod.h | 6 ++--
kernel/kmod.c | 81 +++++++++++++++++++++++++++++++---------------------
2 files changed, 51 insertions(+), 36 deletions(-)
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index f9c18741d1a4..9bd5776b1302 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -72,10 +72,10 @@ struct subprocess_info {
};
extern int
-call_usermodehelper_by(struct kthread_worker *worker,
- char *path, char **argv, char **envp, int wait,
+call_usermodehelper_by(char *path, char **argv, char **envp, int wait,
int (*init)(struct subprocess_info *info, struct cred *new),
- void (*cleanup)(struct subprocess_info *), void *data);
+ void (*cleanup)(struct subprocess_info *), void *data,
+ struct ve_struct *ve);
extern int
call_usermodehelper(char *path, char **argv, char **envp, int wait);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bb7671bc0177..2b45f06cbe51 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -78,8 +78,8 @@ static void free_modprobe_argv(struct subprocess_info *info)
kfree(info->argv);
}
-static int __call_usermodehelper_exec(struct kthread_worker *worker,
- struct subprocess_info *sub_info, int wait);
+static int __call_usermodehelper_exec(struct subprocess_info *sub_info,
+ int wait, struct ve_struct *ve);
static int call_modprobe(char *module_name, int wait, int blacklist)
{
@@ -118,7 +118,7 @@ static int call_modprobe(char *module_name, int wait, int blacklist)
* We enter to this function with the right permittions, so
* it's possible to directly call __call_usermodehelper_exec()
*/
- return __call_usermodehelper_exec(&khelper_worker, info, wait | UMH_KILLABLE);
+ return __call_usermodehelper_exec(info, wait | UMH_KILLABLE, NULL);
free_module_name:
kfree(module_name);
@@ -862,6 +862,19 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
}
EXPORT_SYMBOL(call_usermodehelper_setup);
+static void ve_op_sem_down_read(struct ve_struct *ve)
+{
+ if (ve)
+ down_read(&ve->op_sem);
+}
+
+static void ve_op_sem_up_read(struct ve_struct *ve)
+{
+ if (ve)
+ up_read(&ve->op_sem);
+}
+
+
/**
* call_usermodehelper_exec - start a usermode application
* @sub_info: information about the subprocessa
@@ -874,9 +887,11 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
* asynchronously if wait is not set, and runs as a child of keventd.
* (ie. it runs with full root capabilities).
*/
-static int __call_usermodehelper_exec(struct kthread_worker *worker,
- struct subprocess_info *sub_info, int wait)
+static int __call_usermodehelper_exec(struct subprocess_info *sub_info,
+ int wait, struct ve_struct *ve)
{
+ struct kthread_worker *worker = ve ? &ve->ve_umh_worker
+ : &khelper_worker;
DECLARE_COMPLETION_ONSTACK(done);
int retval = 0;
@@ -912,7 +927,9 @@ static int __call_usermodehelper_exec(struct kthread_worker *worker,
goto unlock;
if (wait & UMH_KILLABLE) {
+ ve_op_sem_up_read(ve);
retval = wait_for_completion_killable(&done);
+ ve_op_sem_down_read(ve);
if (!retval)
goto wait_done;
@@ -922,7 +939,9 @@ static int __call_usermodehelper_exec(struct kthread_worker *worker,
/* fallthrough, umh_complete() was already called */
}
+ ve_op_sem_up_read(ve);
wait_for_completion(&done);
+ ve_op_sem_down_read(ve);
wait_done:
retval = sub_info->retval;
out:
@@ -937,7 +956,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
if (!ve_is_super(get_exec_env()))
return -EPERM;
- return __call_usermodehelper_exec(&khelper_worker, sub_info, wait);
+ return __call_usermodehelper_exec(sub_info, wait, NULL);
}
EXPORT_SYMBOL(call_usermodehelper_exec);
@@ -956,8 +975,8 @@ EXPORT_SYMBOL(call_usermodehelper_exec);
*/
int call_usermodehelper(char *path, char **argv, char **envp, int wait)
{
- return call_usermodehelper_by(&khelper_worker, path, argv, envp,
- wait, NULL, NULL, NULL);
+ return call_usermodehelper_by(path, argv, envp,
+ wait, NULL, NULL, NULL, NULL);
}
EXPORT_SYMBOL(call_usermodehelper);
@@ -968,37 +987,33 @@ int call_usermodehelper_fns_ve(struct ve_struct *ve,
void (*cleanup)(struct subprocess_info *), void *data)
{
int err;
- struct kthread_worker *khelper;
ve = get_ve(ve);
if (!ve)
return -EFAULT;
- khelper = ve_is_super(ve) ? &khelper_worker : &ve->ve_umh_worker;
-
- if (ve_is_super(ve) || (get_exec_env() == ve)) {
- err = call_usermodehelper_by(khelper, path, argv, envp, wait, init,
- cleanup, data);
- goto out_put;
- }
-
- if (wait > UMH_WAIT_EXEC) {
- printk(KERN_ERR "VE#%s: Sleeping call for containers UMH is "
- "not supported\n", ve->ve_name);
- err = -EINVAL;
+ if (ve_is_super(ve)) {
+ err = call_usermodehelper_by(path, argv, envp,
+ wait, init, cleanup, data, NULL);
goto out_put;
}
+ /*
+ * These lock is needed to remove race with ve_stop_ns for containers,
+ * we should not be able to add work to stopped khelper, what can lead
+ * to infinite wait for work's completion.
+ *
+ * Waiting for user-space with semaphore taken is wrong, so
+ * __call_usermodehelper_exec will release it as soon as the work is
+ * queued and wait will be done unlocked.
+ */
down_read(&ve->op_sem);
err = -EPIPE;
- if (!ve->is_running)
- goto out;
-
- err = call_usermodehelper_by(khelper, path, argv, envp, wait, init,
- cleanup, data);
-
-out:
+ if (ve->is_running)
+ err = call_usermodehelper_by(path, argv, envp,
+ wait, init, cleanup, data, ve);
up_read(&ve->op_sem);
+
out_put:
put_ve(ve);
return err;
@@ -1006,15 +1021,15 @@ int call_usermodehelper_fns_ve(struct ve_struct *ve,
EXPORT_SYMBOL(call_usermodehelper_fns_ve);
#endif
-int call_usermodehelper_by(struct kthread_worker *worker,
- char *path, char **argv, char **envp, int wait,
+int call_usermodehelper_by(char *path, char **argv, char **envp, int wait,
int (*init)(struct subprocess_info *info, struct cred *new),
- void (*cleanup)(struct subprocess_info *), void *data)
+ void (*cleanup)(struct subprocess_info *), void *data,
+ struct ve_struct *ve)
{
struct subprocess_info *info;
gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
- if (worker == &khelper_worker && !ve_is_super(get_exec_env()))
+ if (!ve && !ve_is_super(get_exec_env()))
return -EPERM;
info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
@@ -1022,7 +1037,7 @@ int call_usermodehelper_by(struct kthread_worker *worker,
if (info == NULL)
return -ENOMEM;
- return __call_usermodehelper_exec(worker, info, wait);
+ return __call_usermodehelper_exec(info, wait, ve);
}
EXPORT_SYMBOL(call_usermodehelper_by);
--
2.14.3
More information about the CRIU
mailing list