[Devel] [PATCH vz9 19/23] ia32: add 32-bit vdso virtualization.

Nikita Yushchenko nikita.yushchenko at virtuozzo.com
Fri Oct 1 18:53:27 MSK 2021


From: Andrey Ryabinin <aryabinin at virtuozzo.com>

Similarly to the 64-bit vdso, make 32-bit vdso mapping per-ve.
This will allow per container modification of the linux version
in .note section of vdso and monotonic time.

https://jira.sw.ru/browse/PSBM-121668
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>

(cherry-picked from vz8 commit b21e7696d61f ("ia32: add 32-bit vdso
virtualization."))

Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 arch/x86/entry/vdso/vma.c    |  4 ++--
 arch/x86/kernel/process_64.c |  2 +-
 include/linux/ve.h           |  1 +
 kernel/ve/ve.c               | 36 ++++++++++++++++++++++--------------
 4 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index e58417321af2..4d731f37ff6a 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -74,7 +74,7 @@ static void vdso_fix_landing(const struct vdso_image *image,
 		struct vm_area_struct *new_vma)
 {
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-	if (in_ia32_syscall() && image == &vdso_image_32) {
+	if (in_ia32_syscall() && image == get_exec_env()->vdso_32) {
 		struct pt_regs *regs = current_pt_regs();
 		unsigned long vdso_land = image->sym_int80_landing_pad;
 		unsigned long old_land_addr = vdso_land +
@@ -382,7 +382,7 @@ static int load_vdso32(void)
 	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
 		return 0;
 
-	return map_vdso(&vdso_image_32, 0);
+	return map_vdso(get_exec_env()->vdso_32, 0);
 }
 #endif
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 206cdb4793f5..56f864581cc0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -826,7 +826,7 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 # endif
 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 	case ARCH_MAP_VDSO_32:
-		return prctl_map_vdso(&vdso_image_32, arg2);
+		return prctl_map_vdso(get_exec_env()->vdso_32, arg2);
 # endif
 	case ARCH_MAP_VDSO_64:
 		return prctl_map_vdso(get_exec_env()->vdso_64, arg2);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 741867427f57..57e6e440bc0f 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -74,6 +74,7 @@ struct ve_struct {
 	struct task_struct	*umh_task;
 
 	struct vdso_image	*vdso_64;
+	struct vdso_image	*vdso_32;
 };
 
 #define VE_MEMINFO_DEFAULT	1	/* default behaviour */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 6a3248efaf07..0dabbb544898 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -56,6 +56,7 @@ struct ve_struct ve0 = {
 #endif
 	.meminfo_val		= VE_MEMINFO_SYSTEM,
 	.vdso_64		= (struct vdso_image*)&vdso_image_64,
+	.vdso_32		= (struct vdso_image*)&vdso_image_32,
 };
 EXPORT_SYMBOL(ve0);
 
@@ -563,13 +564,12 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
 	up_write(&ve->op_sem);
 }
 
-static int copy_vdso(struct ve_struct *ve)
+static int copy_vdso(struct vdso_image **vdso_dst, const struct vdso_image *vdso_src)
 {
-	const struct vdso_image *vdso_src = &vdso_image_64;
 	struct vdso_image *vdso;
 	void *vdso_data;
 
-	if (ve->vdso_64)
+	if (*vdso_dst)
 		return 0;
 
 	vdso = kmemdup(vdso_src, sizeof(*vdso), GFP_KERNEL);
@@ -586,10 +586,22 @@ static int copy_vdso(struct ve_struct *ve)
 
 	vdso->data = vdso_data;
 
-	ve->vdso_64 = vdso;
+	*vdso_dst = vdso;
 	return 0;
 }
 
+static void ve_free_vdso(struct ve_struct *ve)
+{
+	if (ve->vdso_64 && ve->vdso_64 != &vdso_image_64) {
+		free_pages_exact(ve->vdso_64->data, ve->vdso_64->size);
+		kfree(ve->vdso_64);
+	}
+	if (ve->vdso_32 && ve->vdso_32 != &vdso_image_32) {
+		free_pages_exact(ve->vdso_32->data, ve->vdso_32->size);
+		kfree(ve->vdso_32);
+	}
+}
+
 static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_css)
 {
 	struct ve_struct *ve = &ve0;
@@ -623,7 +635,11 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	if (err)
 		goto err_log;
 
-	err = copy_vdso(ve);
+	err = copy_vdso(&ve->vdso_64, &vdso_image_64);
+	if (err)
+		goto err_vdso;
+
+	err = copy_vdso(&ve->vdso_32, &vdso_image_32);
 	if (err)
 		goto err_vdso;
 
@@ -634,6 +650,7 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	return &ve->css;
 
 err_vdso:
+	ve_free_vdso(ve);
 	ve_log_destroy(ve);
 err_log:
 	free_percpu(ve->sched_lat_ve.cur);
@@ -673,15 +690,6 @@ static void ve_offline(struct cgroup_subsys_state *css)
 	ve->ve_name = NULL;
 }
 
-static void ve_free_vdso(struct ve_struct *ve)
-{
-	if (ve->vdso_64 == &vdso_image_64)
-		return;
-
-	free_pages_exact(ve->vdso_64->data, ve->vdso_64->size);
-	kfree(ve->vdso_64);
-}
-
 static void ve_destroy(struct cgroup_subsys_state *css)
 {
 	struct ve_struct *ve = css_to_ve(css);
-- 
2.30.2



More information about the Devel mailing list