[Devel] [PATCH RHEL7 COMMIT] ve/vdso: x86-32 -- Implement kernel version virtualization
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Jun 24 06:37:14 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.17
------>
commit 65051a2bbf7f62fc5c825dd5a7b0d3b29a26badf
Author: Cyrill Gorcunov <gorcunov at virtuozzo.com>
Date: Wed Jun 24 17:37:14 2015 +0400
ve/vdso: x86-32 -- Implement kernel version virtualization
This is a second patch which implements vDSO version virtualization for
x86-32 programs running on x86-64 kernels (that named compat mode).
Just like for the native x86-64 mode we implement virtualization carrying
vDSO page per uts-namespace.
https://jira.sw.ru/browse/PSBM-30093
https://bugzilla.openvz.org/show_bug.cgi?id=2768
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
CC: Vladimir Davydov <vdavydov at virtuozzo.com>
CC: Konstantin Khorenko <khorenko at virtuozzo.com>
---
arch/x86/vdso/vdso32-setup.c | 91 ++++++++++++++++++++++++++++++++++++++-
arch/x86/vdso/vdso32/note.S | 2 +
arch/x86/vdso/vdso32/vdso32.lds.S | 1 +
include/linux/utsname.h | 3 ++
kernel/utsname.c | 15 +++++++
5 files changed, 111 insertions(+), 1 deletion(-)
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0faad64..0683681 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -26,6 +26,10 @@
#include <asm/vdso.h>
#include <asm/proto.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+#include <linux/ve.h>
+
enum {
VDSO_DISABLED = 0,
VDSO_ENABLED = 1,
@@ -303,6 +307,85 @@ int __init sysenter_setup(void)
return 0;
}
+static struct page **uts_prep_vdso_pages_locked(int map)
+{
+ struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
+ struct mm_struct *mm = current->mm;
+ struct ve_struct *ve = get_exec_env();
+ struct page **pages = vdso32_pages;
+ int n1, n2, n3, new_version;
+ void *addr;
+
+ /*
+ * Simply reuse vDSO pages if we can.
+ */
+ if (uts_ns == &init_uts_ns)
+ return vdso32_pages;
+ else if (uts_ns->vdso32.pages)
+ return uts_ns->vdso32.pages;
+
+ up_write(&mm->mmap_sem);
+
+ if (sscanf(uts_ns->name.release, "%d.%d.%d", &n1, &n2, &n3) == 3) {
+ /*
+ * If there were no changes on version simply reuse
+ * preallocated one.
+ */
+ new_version = KERNEL_VERSION(n1, n2, n3);
+ if (new_version == LINUX_VERSION_CODE)
+ goto out;
+ } else {
+ /*
+ * If admin is passed malformed string here
+ * lets warn him once but continue working
+ * not using vDSO virtualization at all. It's
+ * better than walk out with error.
+ */
+ pr_warn_once("Wrong release uts name format detected."
+ " Ignoring vDSO virtualization.\n");
+ goto out;
+ }
+
+ uts_ns->vdso32.nr_pages = 1;
+ uts_ns->vdso32.size = PAGE_SIZE;
+ uts_ns->vdso32.version_off = (unsigned long)VDSO32_SYMBOL(0, linux_version_code);
+ uts_ns->vdso32.pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+ if (!uts_ns->vdso32.pages) {
+ pr_err("Can't allocate vDSO pages array for VE %d\n", ve->veid);
+ pages = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ uts_ns->vdso32.pages[0] = alloc_page(GFP_KERNEL);
+ if (!uts_ns->vdso32.pages[0]) {
+ pr_err("Can't allocate page for VE %d\n", ve->veid);
+ kfree(uts_ns->vdso32.pages);
+ uts_ns->vdso32.pages = NULL;
+ pages = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ copy_page(page_address(uts_ns->vdso32.pages[0]), page_address(vdso32_pages[0]));
+ pages = uts_ns->vdso32.pages;
+
+ addr = page_address(uts_ns->vdso32.pages[0]);
+ *((int *)(addr + uts_ns->vdso32.version_off)) = new_version;
+ pr_debug("vDSO version transition %d -> %d for VE %d\n",
+ LINUX_VERSION_CODE, new_version, ve->veid);
+
+#ifdef CONFIG_X86_32
+ __set_fixmap(FIX_VDSO, page_to_pfn(uts_ns->vdso32.pages[0]) << PAGE_SHIFT,
+ map ? PAGE_READONLY_EXEC : PAGE_NONE);
+
+ /* flush stray tlbs */
+ flush_tlb_all();
+#endif
+
+out:
+ down_write(&mm->mmap_sem);
+ return pages;
+}
+
/* Setup a VMA at program startup for the vsyscall page */
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
@@ -340,13 +423,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
current->mm->context.vdso = (void *)addr;
if (compat_uses_vma || !compat) {
+ struct page **pages = uts_prep_vdso_pages_locked(compat);
+ if (IS_ERR(pages)) {
+ ret = PTR_ERR(pages);
+ goto up_fail;
+ }
+
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
ret = install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso32_pages);
+ pages);
if (ret)
goto up_fail;
diff --git a/arch/x86/vdso/vdso32/note.S b/arch/x86/vdso/vdso32/note.S
index c83f257..488e84d 100644
--- a/arch/x86/vdso/vdso32/note.S
+++ b/arch/x86/vdso/vdso32/note.S
@@ -9,7 +9,9 @@
/* Ideally this would use UTS_NAME, but using a quoted string here
doesn't work. Remember to change this when changing the
kernel's name. */
+ .globl linux_version_code
ELFNOTE_START(Linux, 0, "a")
+linux_version_code:
.long LINUX_VERSION_CODE
ELFNOTE_END
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
index 976124b..576b9ba 100644
--- a/arch/x86/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -35,3 +35,4 @@ VDSO32_PRELINK = VDSO_PRELINK;
VDSO32_vsyscall = __kernel_vsyscall;
VDSO32_sigreturn = __kernel_sigreturn;
VDSO32_rt_sigreturn = __kernel_rt_sigreturn;
+VDSO32_linux_version_code = linux_version_code;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index ccd270f..31984a9 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -37,6 +37,9 @@ struct uts_namespace {
#ifdef CONFIG_X86
struct uts_vdso vdso;
#endif
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+ struct uts_vdso vdso32;
+#endif
};
extern struct uts_namespace init_uts_ns;
extern struct new_utsname virt_utsname;
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 1980ddb..8b39224 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -25,8 +25,13 @@ static struct uts_namespace *create_uts_ns(void)
uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
if (uts_ns) {
#ifdef CONFIG_X86
+#ifdef CONFIG_X86_64
memset(&uts_ns->vdso, 0, sizeof(uts_ns->vdso));
#endif
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+ memset(&uts_ns->vdso32, 0, sizeof(uts_ns->vdso32));
+#endif
+#endif
kref_init(&uts_ns->kref);
}
return uts_ns;
@@ -91,6 +96,7 @@ void free_uts_ns(struct kref *kref)
put_user_ns(ns->user_ns);
proc_free_inum(ns->proc_inum);
#ifdef CONFIG_X86
+#ifdef CONFIG_X86_64
if (ns->vdso.pages) {
int i;
vunmap(ns->vdso.addr);
@@ -99,6 +105,15 @@ void free_uts_ns(struct kref *kref)
kfree(ns->vdso.pages);
}
#endif
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+ if (ns->vdso32.pages) {
+ int i;
+ for (i = 0; i < ns->vdso32.nr_pages; i++)
+ put_page(ns->vdso32.pages[i]);
+ kfree(ns->vdso32.pages);
+ }
+#endif
+#endif
kfree(ns);
}
More information about the Devel
mailing list