[Devel] [PATCH RHEL7 COMMIT] x86/arch_prctl/vdso: add ARCH_MAP_VDSO_*

Konstantin Khorenko khorenko at virtuozzo.com
Thu Aug 11 02:28:05 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.22.2.vz7.16.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.22.2.vz7.16.5
------>
commit e7054163687a860c1f7a584d7d6912eb3edc8dfc
Author: Dmitry Safonov <dsafonov at virtuozzo.com>
Date:   Thu Aug 11 13:28:04 2016 +0400

    x86/arch_prctl/vdso: add ARCH_MAP_VDSO_*
    
    Add API to change vdso blob type with arch_prctl.
    As this is usefull only by needs of CRIU, expose
    this interface under CONFIG_CHECKPOINT_RESTORE.
    
    Cc: Andy Lutomirski <luto at kernel.org>
    Cc: Ingo Molnar <mingo at redhat.com>
    Cc: Thomas Gleixner <tglx at linutronix.de>
    Cc: "H. Peter Anvin" <hpa at zytor.com>
    
    [Differences to vanilla patches:
     o API only for 32-bit vDSO mapping
     o unmap previous vdso just by mm->context.vdso pointer]
    Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
---
 arch/x86/include/asm/elf.h        |   4 ++
 arch/x86/include/uapi/asm/prctl.h |   6 +++
 arch/x86/kernel/process_64.c      |  13 +++++
 arch/x86/vdso/vdso32-setup.c      | 105 ++++++++++++++++++++++++++++++--------
 4 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 5518cbc..e114fff 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -336,6 +336,10 @@ extern int x32_setup_additional_pages(struct linux_binprm *bprm,
 extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
 #define compat_arch_setup_additional_pages	syscall32_setup_pages
 
+#ifdef CONFIG_X86_64
+extern int do_map_compat_vdso(unsigned long addr);
+#endif
+
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..405e860 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# define ARCH_MAP_VDSO_X32     0x2001
+# define ARCH_MAP_VDSO_32      0x2002
+# define ARCH_MAP_VDSO_64      0x2003
+#endif
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c5a3c1..2485430 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -554,6 +554,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		break;
 	}
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	case ARCH_MAP_VDSO_32:
+		return do_map_compat_vdso(addr);
+
+	/*
+	 * x32 and 64 vDSO remap API is omitted for simplicity.
+	 * We do need 32-bit vDSO blob mapping for compatible
+	 * applications Restore, but not x32/64 (at least, for now).
+	 */
+	case ARCH_MAP_VDSO_X32:
+	case ARCH_MAP_VDSO_64:
+#endif
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index d49dffa..5056d0e 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -416,6 +416,37 @@ out:
 	return pages;
 }
 
+/* Call under mm->mmap_sem */
+static int __arch_setup_additional_pages(unsigned long addr, bool compat)
+{
+	struct mm_struct *mm = current->mm;
+	int ret;
+
+	current->mm->context.vdso = (void *)addr;
+
+	if (compat_uses_vma || !compat) {
+		struct page **pages = uts_prep_vdso_pages_locked(compat);
+		if (IS_ERR(pages))
+			return PTR_ERR(pages);
+
+		/*
+		 * MAYWRITE to allow gdb to COW and set breakpoints
+		 */
+		ret = install_special_mapping(mm, addr, PAGE_SIZE,
+					      VM_READ|VM_EXEC|
+					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+					      pages);
+
+		if (ret)
+			return ret;
+	}
+
+	current_thread_info()->sysenter_return =
+		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
+
+	return 0;
+}
+
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
@@ -450,41 +481,73 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		}
 	}
 
-	current->mm->context.vdso = (void *)addr;
+	ret = __arch_setup_additional_pages(addr, compat);
+	if (ret)
+		current->mm->context.vdso = NULL;
 
-	if (compat_uses_vma || !compat) {
-		struct page **pages = uts_prep_vdso_pages_locked(compat);
-		if (IS_ERR(pages)) {
-			ret = PTR_ERR(pages);
-			goto up_fail;
-		}
+up_fail:
 
-		/*
-		 * MAYWRITE to allow gdb to COW and set breakpoints
-		 */
-		ret = install_special_mapping(mm, addr, PAGE_SIZE,
-					      VM_READ|VM_EXEC|
-					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-					      pages);
+	up_write(&mm->mmap_sem);
 
-		if (ret)
-			goto up_fail;
+	return ret;
+}
+
+#ifdef CONFIG_X86_64
+
+int do_map_compat_vdso(unsigned long req_addr)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long vdso_addr;
+	struct vm_area_struct *vdso_vma;
+	int ret;
+	bool compat;
+
+	if (vdso_enabled == VDSO_DISABLED)
+		return -ENOENT;
+
+	down_write(&mm->mmap_sem);
+
+	compat = (vdso_enabled == VDSO_COMPAT);
+	/* Maybe we can omit this check, but yet let it be for safety */
+	if (compat && req_addr != VDSO_HIGH_BASE) {
+		ret = -EFAULT;
+		goto up_fail;
 	}
 
-	current_thread_info()->sysenter_return =
-		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
+	/* Don't wanna copy security checks like security_mmap_addr() */
+	vdso_addr = get_unmapped_area(NULL, req_addr, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(vdso_addr)) {
+		ret = vdso_addr;
+		goto up_fail;
+	}
+
+	if (req_addr != vdso_addr) {
+		ret = -EFAULT;
+		goto up_fail;
+	}
 
-  up_fail:
+	/*
+	 * Firstly, unmap old vdso - as install_special_mapping may not
+	 * do rlimit/cgroup accounting right - get rid of the old one by
+	 * remove_vma().
+	 */
+	vdso_vma = find_vma_intersection(mm, (unsigned long)mm->context.vdso,
+			(unsigned long)mm->context.vdso +
+			PAGE_SIZE*init_uts_ns.vdso.nr_pages);
+	if (vdso_vma)
+		do_munmap(mm, vdso_vma->vm_start,
+			vdso_vma->vm_end - vdso_vma->vm_start);
+
+	ret = __arch_setup_additional_pages(req_addr, compat);
 	if (ret)
 		current->mm->context.vdso = NULL;
 
+up_fail:
 	up_write(&mm->mmap_sem);
 
 	return ret;
 }
 
-#ifdef CONFIG_X86_64
-
 subsys_initcall(sysenter_setup);
 
 #ifdef CONFIG_SYSCTL


More information about the Devel mailing list