[Devel] [PATCH RHEL7 COMMIT] ve/vdso: x86-32 -- Implement kernel version virtualization

Konstantin Khorenko khorenko at virtuozzo.com
Wed Jun 24 06:37:14 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.17
------>
commit 65051a2bbf7f62fc5c825dd5a7b0d3b29a26badf
Author: Cyrill Gorcunov <gorcunov at virtuozzo.com>
Date:   Wed Jun 24 17:37:14 2015 +0400

    ve/vdso: x86-32 -- Implement kernel version virtualization
    
    This is a second patch which implements vDSO version virtualization for
    x86-32 programs running on x86-64 kernels (that named compat mode).
    
    Just like for the native x86-64 mode we implement virtualization carrying
    vDSO page per uts-namespace.
    
    https://jira.sw.ru/browse/PSBM-30093
    https://bugzilla.openvz.org/show_bug.cgi?id=2768
    
    Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
    
    CC: Vladimir Davydov <vdavydov at virtuozzo.com>
    CC: Konstantin Khorenko <khorenko at virtuozzo.com>
---
 arch/x86/vdso/vdso32-setup.c      | 91 ++++++++++++++++++++++++++++++++++++++-
 arch/x86/vdso/vdso32/note.S       |  2 +
 arch/x86/vdso/vdso32/vdso32.lds.S |  1 +
 include/linux/utsname.h           |  3 ++
 kernel/utsname.c                  | 15 +++++++
 5 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0faad64..0683681 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -26,6 +26,10 @@
 #include <asm/vdso.h>
 #include <asm/proto.h>
 
+#include <linux/utsname.h>
+#include <linux/version.h>
+#include <linux/ve.h>
+
 enum {
 	VDSO_DISABLED = 0,
 	VDSO_ENABLED = 1,
@@ -303,6 +307,85 @@ int __init sysenter_setup(void)
 	return 0;
 }
 
+static struct page **uts_prep_vdso_pages_locked(int map)
+{
+	struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
+	struct mm_struct *mm = current->mm;
+	struct ve_struct *ve = get_exec_env();
+	struct page **pages = vdso32_pages;
+	int n1, n2, n3, new_version;
+	void *addr;
+
+	/*
+	 * Simply reuse vDSO pages if we can.
+	 */
+	if (uts_ns == &init_uts_ns)
+		return vdso32_pages;
+	else if (uts_ns->vdso32.pages)
+		return uts_ns->vdso32.pages;
+
+	up_write(&mm->mmap_sem);
+
+	if (sscanf(uts_ns->name.release, "%d.%d.%d", &n1, &n2, &n3) == 3) {
+		/*
+		 * If there were no changes on version simply reuse
+		 * preallocated one.
+		 */
+		new_version = KERNEL_VERSION(n1, n2, n3);
+		if (new_version == LINUX_VERSION_CODE)
+			goto out;
+	} else {
+		/*
+		 * If admin is passed malformed string here
+		 * lets warn him once but continue working
+		 * not using vDSO virtualization at all. It's
+		 * better than walk out with error.
+		 */
+		pr_warn_once("Wrong release uts name format detected."
+			     " Ignoring vDSO virtualization.\n");
+		goto out;
+	}
+
+	uts_ns->vdso32.nr_pages		= 1;
+	uts_ns->vdso32.size		= PAGE_SIZE;
+	uts_ns->vdso32.version_off	= (unsigned long)VDSO32_SYMBOL(0, linux_version_code);
+	uts_ns->vdso32.pages		= kmalloc(sizeof(struct page *), GFP_KERNEL);
+	if (!uts_ns->vdso32.pages) {
+		pr_err("Can't allocate vDSO pages array for VE %d\n", ve->veid);
+		pages = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	uts_ns->vdso32.pages[0] = alloc_page(GFP_KERNEL);
+	if (!uts_ns->vdso32.pages[0]) {
+		pr_err("Can't allocate page for VE %d\n", ve->veid);
+		kfree(uts_ns->vdso32.pages);
+		uts_ns->vdso32.pages = NULL;
+		pages = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	copy_page(page_address(uts_ns->vdso32.pages[0]), page_address(vdso32_pages[0]));
+	pages = uts_ns->vdso32.pages;
+
+	addr = page_address(uts_ns->vdso32.pages[0]);
+	*((int *)(addr + uts_ns->vdso32.version_off)) = new_version;
+	pr_debug("vDSO version transition %d -> %d for VE %d\n",
+		 LINUX_VERSION_CODE, new_version, ve->veid);
+
+#ifdef CONFIG_X86_32
+	__set_fixmap(FIX_VDSO, page_to_pfn(uts_ns->vdso32.pages[0]) << PAGE_SHIFT,
+		     map ? PAGE_READONLY_EXEC : PAGE_NONE);
+
+	/* flush stray tlbs */
+	flush_tlb_all();
+#endif
+
+out:
+	down_write(&mm->mmap_sem);
+	return pages;
+}
+
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
@@ -340,13 +423,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	current->mm->context.vdso = (void *)addr;
 
 	if (compat_uses_vma || !compat) {
+		struct page **pages = uts_prep_vdso_pages_locked(compat);
+		if (IS_ERR(pages)) {
+			ret = PTR_ERR(pages);
+			goto up_fail;
+		}
+
 		/*
 		 * MAYWRITE to allow gdb to COW and set breakpoints
 		 */
 		ret = install_special_mapping(mm, addr, PAGE_SIZE,
 					      VM_READ|VM_EXEC|
 					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-					      vdso32_pages);
+					      pages);
 
 		if (ret)
 			goto up_fail;
diff --git a/arch/x86/vdso/vdso32/note.S b/arch/x86/vdso/vdso32/note.S
index c83f257..488e84d 100644
--- a/arch/x86/vdso/vdso32/note.S
+++ b/arch/x86/vdso/vdso32/note.S
@@ -9,7 +9,9 @@
 /* Ideally this would use UTS_NAME, but using a quoted string here
    doesn't work. Remember to change this when changing the
    kernel's name. */
+	.globl linux_version_code
 ELFNOTE_START(Linux, 0, "a")
+linux_version_code:
 	.long LINUX_VERSION_CODE
 ELFNOTE_END
 
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
index 976124b..576b9ba 100644
--- a/arch/x86/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -35,3 +35,4 @@ VDSO32_PRELINK		= VDSO_PRELINK;
 VDSO32_vsyscall		= __kernel_vsyscall;
 VDSO32_sigreturn	= __kernel_sigreturn;
 VDSO32_rt_sigreturn	= __kernel_rt_sigreturn;
+VDSO32_linux_version_code = linux_version_code;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index ccd270f..31984a9 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -37,6 +37,9 @@ struct uts_namespace {
 #ifdef CONFIG_X86
 	struct uts_vdso vdso;
 #endif
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+	struct uts_vdso vdso32;
+#endif
 };
 extern struct uts_namespace init_uts_ns;
 extern struct new_utsname virt_utsname;
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 1980ddb..8b39224 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -25,8 +25,13 @@ static struct uts_namespace *create_uts_ns(void)
 	uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
 	if (uts_ns) {
 #ifdef CONFIG_X86
+#ifdef CONFIG_X86_64
 		memset(&uts_ns->vdso, 0, sizeof(uts_ns->vdso));
 #endif
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+		memset(&uts_ns->vdso32, 0, sizeof(uts_ns->vdso32));
+#endif
+#endif
 		kref_init(&uts_ns->kref);
 	}
 	return uts_ns;
@@ -91,6 +96,7 @@ void free_uts_ns(struct kref *kref)
 	put_user_ns(ns->user_ns);
 	proc_free_inum(ns->proc_inum);
 #ifdef CONFIG_X86
+#ifdef CONFIG_X86_64
 	if (ns->vdso.pages) {
 		int i;
 		vunmap(ns->vdso.addr);
@@ -99,6 +105,15 @@ void free_uts_ns(struct kref *kref)
 		kfree(ns->vdso.pages);
 	}
 #endif
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+	if (ns->vdso32.pages) {
+		int i;
+		for (i = 0; i < ns->vdso32.nr_pages; i++)
+			put_page(ns->vdso32.pages[i]);
+		kfree(ns->vdso32.pages);
+	}
+#endif
+#endif
 	kfree(ns);
 }
 



More information about the Devel mailing list