[Devel] [PATCH RH9 v2 7/7] ve/coredump: virtualize kernel.core_pattern sysctl

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Tue Oct 5 16:34:17 MSK 2021


1) put core_pattern on ve_struct and add virtualized proc_handler
using macro sysctl_virtual()

2) use in-container khelper kthread for collecting cores

We need support of pipe'ing coredump in CT. If process crashes
in CT and kernel.core_pattern begins with "|" we get error:

CT:
  CT-101-bash-4.2# sysctl kernel.core_pattern
   kernel.core_pattern = |/root/core_test %p UID=%u GID=%g sig=%s
  CT-101-bash-4.2# sleep 100 &
   [1] 445
  CT-101-bash-4.2# kill -11 445
  CT-101-bash-4.2#
   [1]+  Segmentation fault      sleep 100
  CT-101-bash-4.2# ll /root
   total 8
   rwxr-xr-x 1 root root 7424 Jan 27 17:28 core_test
Host:
  [root at s143 ~]# less /var/log/messages
   Jan 28 10:40:42 s143 kernel: [87698.969582] Core dump to
   |/root/core_test 445 UID=0 GID=0 sig=11 pipe failed

Also docker is going to use pipe'ing in coredumps functionality:
https://github.com/docker/docker/issues/19289

https://jira.sw.ru/browse/PSBM-43596

v2: split sysctl_virtual in separate patch

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>

+++
ve, coredump: Fix usercopy warning.

Usercopy WARNs about copying ve.core_pattern to user space:

vzctl exec e2783e5e-840e-4c9a-9ef2-7ead73afb81c cat /proc/sys/kernel/core_pattern
 Call Trace:
  __check_object_size+0x134/0x160
  proc_dostring+0x164/0x200
  proc_dostring_coredump_virtual+0xb2/0xd0
  proc_dostring+0x200/0x200
  proc_sys_call_handler+0xa7/0xf0
  vfs_read+0x9d/0x150
  ksys_read+0x4f/0xb0
  do_syscall_64+0x5b/0x1c0
  entry_SYSCALL_64_after_hwframe+0x65/0xca

Whitelist core_pattern in ve_struct kmem_cache since copying core_pattern
is allowed by design.

Fixes: e931118f8139 ("ve: Add ve cgroup and ve_hook subsys")
https://jira.sw.ru/browse/PSBM-106216

Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>

vz9 change: merge with usercopy warning fixup for ve->core_pattern

(cherry picked from vz8 commit f8c44aa189fcac4c137195ca0b7af2f73e07d712)
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
v2: merge with usercopy warning fixup
---
 fs/coredump.c            | 12 +++++++-----
 include/linux/coredump.h |  1 -
 include/linux/ve.h       |  5 +++++
 kernel/sysctl.c          | 13 +++++++++----
 kernel/ve/ve.c           |  5 ++++-
 5 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index ae25d8b7a07b..09d90f1e08a6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -41,6 +41,7 @@
 #include <linux/fs.h>
 #include <linux/path.h>
 #include <linux/timekeeping.h>
+#include <linux/ve.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -54,7 +55,6 @@
 
 int core_uses_pid;
 unsigned int core_pipe_limit;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
 static int core_name_size = CORENAME_MAX_SIZE;
 
 struct core_name {
@@ -197,7 +197,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
 			   size_t **argv, int *argc)
 {
 	const struct cred *cred = current_cred();
-	const char *pat_ptr = core_pattern;
+	struct ve_struct *ve = get_exec_env();
+	const char *pat_ptr = ve->core_pattern;
 	int ispipe = (*pat_ptr == '|');
 	bool was_space = false;
 	int pid_in_pattern = 0;
@@ -210,7 +211,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
 	cn->corename[0] = '\0';
 
 	if (ispipe) {
-		int argvs = sizeof(core_pattern) / 2;
+		int argvs = sizeof(ve->core_pattern) / 2;
 		(*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
 		if (!(*argv))
 			return -ENOMEM;
@@ -696,8 +697,9 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 						helper_argv, NULL, GFP_KERNEL,
 						umh_pipe_setup, NULL, &cprm);
 		if (sub_info)
-			retval = call_usermodehelper_exec(sub_info,
-							  UMH_WAIT_EXEC);
+			retval = call_usermodehelper_exec_ve(get_exec_env(),
+							     sub_info,
+							     UMH_WAIT_EXEC);
 
 		kfree(helper_argv);
 		if (retval) {
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 78fcd776b185..4e53db2ff609 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -15,7 +15,6 @@ struct core_vma_metadata {
 };
 
 extern int core_uses_pid;
-extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 
 /*
diff --git a/include/linux/ve.h b/include/linux/ve.h
index c2bdadb487f1..959e919633c9 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -18,6 +18,7 @@
 #include <linux/vzstat.h>
 #include <asm/vdso.h>
 #include <linux/time_namespace.h>
+#include <linux/binfmts.h>
 
 struct nsproxy;
 struct veip_struct;
@@ -73,6 +74,10 @@ struct ve_struct {
 
 	atomic_t		mnt_nr; /* number of present VE mounts */
 
+#ifdef CONFIG_COREDUMP
+	char			core_pattern[CORENAME_MAX_SIZE];
+#endif
+
 	struct kthread_worker	*kthreadd_worker;
 	struct task_struct	*kthreadd_task;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e50829903763..10c92dccf575 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1165,9 +1165,10 @@ static int proc_dopipe_max_size(struct ctl_table *table, int write,
 
 static void validate_coredump_safety(void)
 {
+	struct ve_struct *ve = get_exec_env();
 #ifdef CONFIG_COREDUMP
 	if (suid_dumpable == SUID_DUMP_ROOT &&
-	    core_pattern[0] != '/' && core_pattern[0] != '|') {
+	    ve->core_pattern[0] != '/' && ve->core_pattern[0] != '|') {
 		printk(KERN_WARNING
 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
 "Pipe handler or fully qualified core dump path required.\n"
@@ -1802,6 +1803,10 @@ int proc_do_static_key(struct ctl_table *table, int write,
 	return ret;
 }
 
+#ifdef CONFIG_COREDUMP
+sysctl_virtual(proc_dostring_coredump);
+#endif
+
 static struct ctl_table kern_table[] = {
 	{
 		.procname	= "sched_child_runs_first",
@@ -1987,10 +1992,10 @@ static struct ctl_table kern_table[] = {
 	},
 	{
 		.procname	= "core_pattern",
-		.data		= core_pattern,
+		.data		= ve0.core_pattern,
 		.maxlen		= CORENAME_MAX_SIZE,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring_coredump,
+		.mode		= 0644 | S_ISVTX,
+		.proc_handler	= proc_dostring_coredump_virtual,
 	},
 	{
 		.procname	= "core_pipe_limit",
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 1adb875dffc9..cd4b982cc0da 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -665,6 +665,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 
 	atomic_set(&ve->mnt_nr, 0);
 
+#ifdef CONFIG_COREDUMP
+	strcpy(ve->core_pattern, "core");
+#endif
 	INIT_LIST_HEAD(&ve->devmnt_list);
 	mutex_init(&ve->devmnt_mutex);
 
@@ -1331,7 +1334,7 @@ EXPORT_SYMBOL_GPL(ve_cgrp_subsys);
 
 static int __init ve_subsys_init(void)
 {
-	ve_cachep = KMEM_CACHE(ve_struct, SLAB_PANIC);
+	ve_cachep = KMEM_CACHE_USERCOPY(ve_struct, SLAB_PANIC, core_pattern);
 	list_add(&ve0.ve_list, &ve_list_head);
 	return 0;
 }
-- 
2.31.1



More information about the Devel mailing list