[Devel] [PATCH RHEL10 COMMIT] ve: Make task_ve pointer ve namespace based

Konstantin Khorenko khorenko at virtuozzo.com
Mon Dec 1 20:07:58 MSK 2025


The commit is pushed to "branch-rh10-6.12.0-55.13.1.2.x.vz10-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh10-6.12.0-55.13.1.2.21.vz10
------>
commit a11c98da280547e34b71f12110a08c979ff72d7d
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Mon Nov 24 19:20:42 2025 +0800

    ve: Make task_ve pointer ve namespace based
    
    This way task_ve pointer's lifetime extends to ve namespace lifetime
    (which holds a reference to ve) and we can drop all the tricks about
    unsetting task_ve to prevent use after free.
    
    https://virtuozzo.atlassian.net/browse/VSTOR-118289
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    Feature: ve: ve generic structures
    ======
    Patchset description:
    ve: Add VE namespace
    
    Main ideas behind VE namespace explained in "ve: Introduce VE
    namespace".
---
 kernel/ve/ve.c           | 51 ------------------------------------------------
 kernel/ve/ve_namespace.c |  2 ++
 2 files changed, 2 insertions(+), 51 deletions(-)

diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 75148d25b7b39..1bde9c27f8683 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -32,7 +32,6 @@
 #include <linux/ctype.h>
 #include <linux/tty.h>
 #include <linux/device.h>
-#include <linux/sched.h>
 #include <linux/sched/loadavg.h>
 
 #include <uapi/linux/vzcalluser.h>
@@ -857,14 +856,6 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
 	}
 	rcu_read_unlock();
 
-	/*
-	 * To prevent use-after-free (e.g. reading /proc/pid/stat) in case
-	 * zombie container init outlives the ve_struct.
-	 *
-	 * Note: synchronize_rcu in ve_drop_context makes it rcu safe.
-	 */
-	rcu_assign_pointer(current->task_ve, &ve0);
-
 	/*
 	 * At this point all userspace tasks in container are dead.
 	 */
@@ -1211,50 +1202,9 @@ static void ve_attach(struct cgroup_taskset *tset)
 
 		if (cpuid_override_on())
 			set_tsk_thread_flag(task, TIF_CPUID_OVERRIDE);
-
-		rcu_assign_pointer(task->task_ve, ve);
 	}
 }
 
-/*
- * When a container task exits it is moved out of its css set, see cgroup_exit.
- * So the task effectively puts its reference to ve_struct of the container,
- * and now nothing prevents ve_struct from being freed while the task is still
- * there.
- *
- * We need to make sure that this task's ->task_ve is not pointing to the
- * container ve before putting the reference, else the use of ->task_ve from
- * the task can lead to use-after-free (e.g. when reading /proc/pid/stat).
- *
- * This is not a problem for a "normal" task (basically any task inside
- * container process tree except init), as such a "normal" task will be reaped
- * in zap_pid_ns_processes by the init, before init releases it's own reference
- * to ve.
- *
- * So we have two cases:
- * - init is handled in ve_exit_ns, unless there is no ve_nsproxy;
- * - "external" task (the task which does not resolve to a valid pid in
- *   container pid namespace, and thus there is no guaranty that it would be
- *   reaped in time) is handled below.
- */
-static void ve_exit(struct task_struct *task)
-{
-	struct ve_struct *ve = task->task_ve;
-
-	/*
-	 * Pairs with synchronize_rcu in ve_grab_context and ve_drop_context.
-	 */
-	rcu_read_lock();
-	/*
-	 * Clear task_ve if ve has no namespaces (ve is starting, stopped or
-	 * stopping), or in case of "external" task.
-	 */
-	if (!ve->ve_nsproxy ||
-	    !task_pid_nr_ns(task, ve->ve_nsproxy->pid_ns_for_children))
-		rcu_assign_pointer(task->task_ve, &ve0);
-	rcu_read_unlock();
-}
-
 static int ve_state_show(struct seq_file *sf, void *v)
 {
 	struct cgroup_subsys_state *css = seq_css(sf);
@@ -1962,7 +1912,6 @@ struct cgroup_subsys ve_cgrp_subsys = {
 	.css_free	= ve_destroy,
 	.can_attach	= ve_can_attach,
 	.attach		= ve_attach,
-	.exit		= ve_exit,
 	.legacy_cftypes	= ve_cftypes,
 };
 
diff --git a/kernel/ve/ve_namespace.c b/kernel/ve/ve_namespace.c
index e5e5f540205ed..6dae8487735f9 100644
--- a/kernel/ve/ve_namespace.c
+++ b/kernel/ve/ve_namespace.c
@@ -83,6 +83,7 @@ int copy_ve_ns(unsigned long flags, struct task_struct *p)
 		return PTR_ERR(new_ve_ns);
 
 	p->ve_ns = new_ve_ns;
+	p->task_ve = new_ve_ns->ve;
 	return 0;
 }
 
@@ -129,6 +130,7 @@ void switch_ve_namespace(struct task_struct *p, struct ve_namespace *new)
 	task_lock(p);
 	old = p->ve_ns;
 	p->ve_ns = new;
+	p->task_ve = new ? new->ve : &ve0;
 	task_unlock(p);
 
 	if (old)


More information about the Devel mailing list