[Devel] [PATCH 4/4] x86/arch_prctl/vdso: add ARCH_MAP_VDSO_*

Dmitry Safonov dsafonov at virtuozzo.com
Tue Jul 26 07:25:02 PDT 2016


Add API to change vdso blob type with arch_prctl.
As this is usefull only by needs of CRIU, expose
this interface under CONFIG_CHECKPOINT_RESTORE.

Cc: Andy Lutomirski <luto at kernel.org>
Cc: Ingo Molnar <mingo at redhat.com>
Cc: Thomas Gleixner <tglx at linutronix.de>
Cc: "H. Peter Anvin" <hpa at zytor.com>

[Differences to vanilla patches:
 o API only for 32-bit vDSO mapping
 o unmap previous vdso just by mm->context.vdso pointer]
Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
---
 arch/x86/include/asm/elf.h        |   4 ++
 arch/x86/include/uapi/asm/prctl.h |   6 +++
 arch/x86/kernel/process_64.c      |  13 +++++
 arch/x86/vdso/vdso32-setup.c      | 105 ++++++++++++++++++++++++++++++--------
 4 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 5518cbccef59..e114fff00087 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -336,6 +336,10 @@ extern int x32_setup_additional_pages(struct linux_binprm *bprm,
 extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
 #define compat_arch_setup_additional_pages	syscall32_setup_pages
 
+#ifdef CONFIG_X86_64
+extern int do_map_compat_vdso(unsigned long addr);
+#endif
+
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032fae09..405e860b2aec 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# define ARCH_MAP_VDSO_X32     0x2001
+# define ARCH_MAP_VDSO_32      0x2002
+# define ARCH_MAP_VDSO_64      0x2003
+#endif
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c5a3c1eac62..2485430f4f2a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -554,6 +554,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		break;
 	}
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	case ARCH_MAP_VDSO_32:
+		return do_map_compat_vdso(addr);
+
+	/*
+	 * x32 and 64 vDSO remap API is omitted for simplicity.
+	 * We do need 32-bit vDSO blob mapping for compatible
+	 * applications Restore, but not x32/64 (at least, for now).
+	 */
+	case ARCH_MAP_VDSO_X32:
+	case ARCH_MAP_VDSO_64:
+#endif
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index d49dffaabc3b..5056d0ec9ab7 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -416,6 +416,37 @@ out:
 	return pages;
 }
 
+/* Call under mm->mmap_sem */
+static int __arch_setup_additional_pages(unsigned long addr, bool compat)
+{
+	struct mm_struct *mm = current->mm;
+	int ret;
+
+	current->mm->context.vdso = (void *)addr;
+
+	if (compat_uses_vma || !compat) {
+		struct page **pages = uts_prep_vdso_pages_locked(compat);
+		if (IS_ERR(pages))
+			return PTR_ERR(pages);
+
+		/*
+		 * MAYWRITE to allow gdb to COW and set breakpoints
+		 */
+		ret = install_special_mapping(mm, addr, PAGE_SIZE,
+					      VM_READ|VM_EXEC|
+					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+					      pages);
+
+		if (ret)
+			return ret;
+	}
+
+	current_thread_info()->sysenter_return =
+		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
+
+	return 0;
+}
+
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
@@ -450,41 +481,73 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		}
 	}
 
-	current->mm->context.vdso = (void *)addr;
+	ret = __arch_setup_additional_pages(addr, compat);
+	if (ret)
+		current->mm->context.vdso = NULL;
 
-	if (compat_uses_vma || !compat) {
-		struct page **pages = uts_prep_vdso_pages_locked(compat);
-		if (IS_ERR(pages)) {
-			ret = PTR_ERR(pages);
-			goto up_fail;
-		}
+up_fail:
 
-		/*
-		 * MAYWRITE to allow gdb to COW and set breakpoints
-		 */
-		ret = install_special_mapping(mm, addr, PAGE_SIZE,
-					      VM_READ|VM_EXEC|
-					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-					      pages);
+	up_write(&mm->mmap_sem);
 
-		if (ret)
-			goto up_fail;
+	return ret;
+}
+
+#ifdef CONFIG_X86_64
+
+int do_map_compat_vdso(unsigned long req_addr)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long vdso_addr;
+	struct vm_area_struct *vdso_vma;
+	int ret;
+	bool compat;
+
+	if (vdso_enabled == VDSO_DISABLED)
+		return -ENOENT;
+
+	down_write(&mm->mmap_sem);
+
+	compat = (vdso_enabled == VDSO_COMPAT);
+	/* Maybe we can omit this check, but yet let it be for safety */
+	if (compat && req_addr != VDSO_HIGH_BASE) {
+		ret = -EFAULT;
+		goto up_fail;
 	}
 
-	current_thread_info()->sysenter_return =
-		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
+	/* Don't wanna copy security checks like security_mmap_addr() */
+	vdso_addr = get_unmapped_area(NULL, req_addr, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(vdso_addr)) {
+		ret = vdso_addr;
+		goto up_fail;
+	}
+
+	if (req_addr != vdso_addr) {
+		ret = -EFAULT;
+		goto up_fail;
+	}
 
-  up_fail:
+	/*
+	 * Firstly, unmap old vdso - as install_special_mapping may not
+	 * do rlimit/cgroup accounting right - get rid of the old one by
+	 * remove_vma().
+	 */
+	vdso_vma = find_vma_intersection(mm, (unsigned long)mm->context.vdso,
+			(unsigned long)mm->context.vdso +
+			PAGE_SIZE*init_uts_ns.vdso.nr_pages);
+	if (vdso_vma)
+		do_munmap(mm, vdso_vma->vm_start,
+			vdso_vma->vm_end - vdso_vma->vm_start);
+
+	ret = __arch_setup_additional_pages(req_addr, compat);
 	if (ret)
 		current->mm->context.vdso = NULL;
 
+up_fail:
 	up_write(&mm->mmap_sem);
 
 	return ret;
 }
 
-#ifdef CONFIG_X86_64
-
 subsys_initcall(sysenter_setup);
 
 #ifdef CONFIG_SYSCTL
-- 
2.9.0



More information about the Devel mailing list