[Devel] [PATCH RH8 2/2] drivers/connector: fix nullptr dereference ve->ve_ns->pid_ns
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Tue Jun 29 13:16:25 MSK 2021
On 25.06.2021 20:12, Andrey Zhadchenko wrote:
> cn_proc_ack incorrectly assumes that ve->ve_ns is not NULL. Check it.
> Also add rcu_dereference since ve->ve_ns is rcu-protected.
> For various fill callbacks we can omit rcu_read_lock since ve_ns can only
> be destroyed after cn_fini_ve, which unregisters connector and frees its
> data.
>
> Example of crash when someone tries to register to connector with
> ve_ns == NULL
>
> [ 52.805823] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
> [ 52.807174] PGD 0 P4D 0
> [ 52.807548] Oops: 0000 [#1] SMP PTI
> [ 52.808060] CPU: 0 PID: 311 Comm: a.out ve: 1 Not tainted 4.18.0.ovz.custom #79 custom
> [ 52.809192] Hardware name: Virtuozzo KVM, BIOS 1.11.0-2.vz7.2 04/01/2014
> [ 52.810183] RIP: 0010:cn_proc_mcast_ctl+0x73/0x1a0
> [ 52.810994] Code: 41 5c 41 5d c3 48 89 fb 49 89 f4 4c 8b ad c0 08 00 00 e8 80 f6 95 ff 84 c0 74 cb 48 89 ef e8 f4 a5 8f ff 49 8b 95 98 01 00 00 <46
> [ 52.813662] RSP: 0018:ffffaadb003cbca0 EFLAGS: 00010206
> [ 52.814418] RAX: ffffffff980507a0 RBX: ffff8b99bb376a10 RCX: 0000000000000028
> [ 52.815448] RDX: 0000000000000000 RSI: ffff8b99b34c9628 RDI: ffff8b99bb300000
> [ 52.816509] RBP: ffff8b99bb300000 R08: ffffffff98225f80 R09: 0000000000000000
> [ 52.817645] R10: 0000000000000000 R11: 0000000000000028 R12: ffff8b99b34c9628
> [ 52.818664] R13: ffff8b99bb23b4d0 R14: ffff8b99b34c9628 R15: ffffaadb003cbe28
> [ 52.819716] FS: 00007f1555cc4540(0000) GS:ffff8b99bea00000(0000) knlGS:0000000000000000
> [ 52.820922] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 52.821746] CR2: 0000000000000020 CR3: 000000003d2d8000 CR4: 00000000000006f0
> [ 52.822767] Call Trace:
> [ 52.823183] ? _cond_resched+0x15/0x30
> [ 52.823809] ? __kmalloc_node_track_caller+0x1cb/0x280
> [ 52.824554] ? __alloc_skb+0x82/0x1c0
> [ 52.825097] ? __netlink_lookup+0xe6/0x150
> [ 52.825689] cn_rx_skb+0xe1/0x120
> [ 52.826174] netlink_unicast+0x21d/0x260
> [ 52.826739] netlink_sendmsg+0x22e/0x400
> [ 52.827308] sock_sendmsg+0x4c/0x50
> [ 52.827815] __sys_sendto+0xee/0x160
> [ 52.828337] ? __sys_bind+0xd2/0xf0
> [ 52.828845] ? handle_mm_fault+0xc2/0x1d0
> [ 52.829428] ? __do_page_fault+0x23a/0x4f0
> [ 52.830020] __x64_sys_sendto+0x24/0x30
> [ 52.830576] do_syscall_64+0x5b/0x1d0
> [ 52.831110] entry_SYSCALL_64_after_hwframe+0x65/0xca
> [ 52.831836] RIP: 0033:0x7f1555bfd20c
> [ 52.832358] Code: 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 19 45 31 c9 45 31 c0 b8 2c 00 00 00 0f 05 <40
> [ 52.835010] RSP: 002b:00007fffc61bb878 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
> [ 52.836098] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1555bfd20c
> [ 52.837115] RDX: 0000000000000028 RSI: 00007fffc61bb890 RDI: 0000000000000003
> [ 52.838137] RBP: 00007fffc61bb8c0 R08: 0000000000000000 R09: 0000000000000000
> [ 52.839158] R10: 0000000000000000 R11: 0000000000000246 R12: 00005630710cd120
> [ 52.840176] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> [ 52.841196] Modules linked in:
> [ 52.841639] Features: eBPF/cgroup
> [ 52.842120] CR2: 0000000000000020
> [ 52.842645] ---[ end trace 4508d0fc7c54945c ]---
> [ 52.843323] RIP: 0010:cn_proc_mcast_ctl+0x73/0x1a0
> [ 52.844018] Code: 41 5c 41 5d c3 48 89 fb 49 89 f4 4c 8b ad c0 08 00 00 e8 80 f6 95 ff 84 c0 74 cb 48 89 ef e8 f4 a5 8f ff 49 8b 95 98 01 00 00 <46
> [ 52.846714] RSP: 0018:ffffaadb003cbca0 EFLAGS: 00010206
> [ 52.847481] RAX: ffffffff980507a0 RBX: ffff8b99bb376a10 RCX: 0000000000000028
> [ 52.848507] RDX: 0000000000000000 RSI: ffff8b99b34c9628 RDI: ffff8b99bb300000
> [ 52.849533] RBP: ffff8b99bb300000 R08: ffffffff98225f80 R09: 0000000000000000
> [ 52.850561] R10: 0000000000000000 R11: 0000000000000028 R12: ffff8b99b34c9628
> [ 52.851588] R13: ffff8b99bb23b4d0 R14: ffff8b99b34c9628 R15: ffffaadb003cbe28
> [ 52.852616] FS: 00007f1555cc4540(0000) GS:ffff8b99bea00000(0000) knlGS:0000000000000000
> [ 52.853778] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 52.854649] CR2: 0000000000000020 CR3: 000000003d2d8000 CR4: 00000000000006f0
> [ 52.855681] Kernel panic - not syncing: Fatal exception
> [ 52.856567] Kernel Offset: 0x15800000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
> [ 52.858158] ---[ end Kernel panic - not syncing: Fatal exception ]---
>
> https://jira.sw.ru/browse/PSBM-130894
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
> ---
> drivers/connector/cn_proc.c | 35 +++++++++++++++++++++++++----------
> 1 file changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
> index 61fd165..9368435 100644
> --- a/drivers/connector/cn_proc.c
> +++ b/drivers/connector/cn_proc.c
> @@ -144,7 +144,8 @@ static bool fill_fork_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int unused)
> {
> struct task_struct *parent;
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
Doing rcu_dereference() not under rcu_read_lock() looks wrong, it would
likely fail on debug kernel with CONFIG_DEBUG_LOCK_ALLOC enabled. If we
don't need rcu_read_lock() here we don't need rcu_dereference() also.
>
> rcu_read_lock();
> parent = rcu_dereference(task->real_parent);
> @@ -164,7 +165,8 @@ void proc_fork_connector(struct task_struct *task)
> static bool fill_exec_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int unused)
> {
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
>
> ev->event_data.exec.process_pid = task_pid_nr_ns(task, pid_ns);
> ev->event_data.exec.process_tgid = task_tgid_nr_ns(task, pid_ns);
> @@ -180,7 +182,8 @@ static bool fill_id_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int which_id)
> {
> const struct cred *cred;
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
> struct user_namespace *user_ns = ve->init_cred->user_ns;
>
> ev->event_data.id.process_pid = task_pid_nr_ns(task, pid_ns);
> @@ -209,7 +212,8 @@ void proc_id_connector(struct task_struct *task, int which_id)
> static bool fill_sid_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int unused)
> {
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
>
> ev->event_data.sid.process_pid = task_pid_nr_ns(task, pid_ns);
> ev->event_data.sid.process_tgid = task_tgid_nr_ns(task, pid_ns);
> @@ -224,7 +228,8 @@ void proc_sid_connector(struct task_struct *task)
> static bool fill_ptrace_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int ptrace_id)
> {
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
>
> ev->event_data.ptrace.process_pid = task_pid_nr_ns(task, pid_ns);
> ev->event_data.ptrace.process_tgid = task_tgid_nr_ns(task, pid_ns);
> @@ -248,7 +253,8 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
> static bool fill_comm_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int unused)
> {
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
>
> ev->event_data.comm.process_pid = task_pid_nr_ns(task, pid_ns);
> ev->event_data.comm.process_tgid = task_tgid_nr_ns(task, pid_ns);
> @@ -264,7 +270,9 @@ void proc_comm_connector(struct task_struct *task)
> static bool fill_coredump_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int unused)
> {
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
> +
>
> ev->event_data.coredump.process_pid = task_pid_nr_ns(task, pid_ns);
> ev->event_data.coredump.process_tgid = task_tgid_nr_ns(task, pid_ns);
> @@ -283,7 +291,8 @@ void proc_coredump_connector(struct task_struct *task)
> static bool fill_exit_event(struct proc_event *ev, struct ve_struct *ve,
> struct task_struct *task, int unused)
> {
> - struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
> + struct pid_namespace *pid_ns =
> + rcu_dereference(ve->ve_ns)->pid_ns_for_children;
>
> ev->event_data.exit.process_pid = task_pid_nr_ns(task, pid_ns);
> ev->event_data.exit.process_tgid = task_tgid_nr_ns(task, pid_ns);
> @@ -342,6 +351,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
> {
> enum proc_cn_mcast_op *mc_op = NULL;
> struct ve_struct *ve = get_exec_env();
> + struct nsproxy *ve_ns;
> int err = 0;
>
> if (msg->len != sizeof(*mc_op))
> @@ -352,9 +362,14 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
> * and user namespaces so ignore requestors from
> * other namespaces.
> */
> - if (!current_user_ns_initial() ||
> - (task_active_pid_ns(current) != ve->ve_ns->pid_ns_for_children))
> + rcu_read_lock();
> + ve_ns = rcu_dereference(ve->ve_ns);
> + if (!current_user_ns_initial() || !ve_ns ||
> + (task_active_pid_ns(current) != ve_ns->pid_ns_for_children)) {
> + rcu_read_unlock();
> return;
> + }
> + rcu_read_unlock();
>
> /* Can only change if privileged. */
> if (!__netlink_ns_capable(nsp, ve_init_user_ns(), CAP_NET_ADMIN)) {
>
--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
More information about the Devel
mailing list