[Devel] [PATCH RHEL7 COMMIT] ms/prctl: more prctl(PR_SET_MM_*) checks

Konstantin Khorenko khorenko at virtuozzo.com
Tue Oct 20 02:31:09 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.8.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.8.8
------>
commit bb9f7dbacfe105baa65562e3baa6d687c75ff924
Author: Alexey Dobriyan <adobriyan at gmail.com>
Date:   Tue Oct 20 13:30:24 2015 +0400

    ms/prctl: more prctl(PR_SET_MM_*) checks
    
    This is needed for CRIU.
    
    ML: 4a00e9df293d010acbea118b9521e08cb85016c6
    
    https://jira.sw.ru/browse/PSBM-39834
    
    From: Alexey Dobriyan <adobriyan at gmail.com>
    
    Individual prctl(PR_SET_MM_*) calls do some checking to maintain a
    consistent view of mm->arg_start et al fields, but not enough.  In
    particular PR_SET_MM_ARG_START/PR_SET_MM_ARG_END/ R_SET_MM_ENV_START/
    PR_SET_MM_ENV_END only check that the address lies in an existing VMA,
    but don't check that the start address is lower than the end address _at
    all_.
    
    Consolidate all consistency checks, so there will be no difference in
    the future between PR_SET_MM_MAP and individual PR_SET_MM_* calls.
    
    The program below makes both ARGV and ENVP areas be reversed.  It makes
    /proc/$PID/cmdline show garbage (it doesn't oops by luck).
    
    enum {PAGE_SIZE=4096};
    
    int main(void)
    {
            void *p;
    
            p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
    
            prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long)p + PAGE_SIZE - 1, 0, 0);
            prctl(PR_SET_MM, PR_SET_MM_ARG_END,   (unsigned long)p, 0, 0);
            prctl(PR_SET_MM, PR_SET_MM_ENV_START, (unsigned long)p + PAGE_SIZE - 1, 0, 0);
            prctl(PR_SET_MM, PR_SET_MM_ENV_END,   (unsigned long)p, 0, 0);
    
            pause();
            return 0;
    }
    
    [akpm at linux-foundation.org: tidy code, tweak comment]
    Signed-off-by: Alexey Dobriyan <adobriyan at gmail.com>
    Acked-by: Cyrill Gorcunov <gorcunov at openvz.org>
    Cc: Jarod Wilson <jarod at redhat.com>
    Cc: Jan Stancek <jstancek at redhat.com>
    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
    Signed-off-by: Cyrill Gorcunov <gorcunov at virtuozzo.com>
---
 kernel/sys.c | 158 ++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 91 insertions(+), 67 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 9218fde..9dd9d57 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2095,7 +2095,6 @@ exit:
 	return err;
 }
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
 /*
  * WARNING: we don't require any capability here so be very careful
  * in what is allowed for modification from userspace.
@@ -2191,6 +2190,7 @@ out:
 	return error;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
 {
 	struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
@@ -2276,10 +2276,41 @@ out:
 }
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
+static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
+			  unsigned long len)
+{
+	/*
+	 * This doesn't move the auxiliary vector itself since it's pinned to
+	 * mm_struct, but it permits filling the vector with new values.  It's
+	 * up to the caller to provide sane values here, otherwise userspace
+	 * tools which use this vector might be unhappy.
+	 */
+	unsigned long user_auxv[AT_VECTOR_SIZE];
+
+	if (len > sizeof(user_auxv))
+		return -EINVAL;
+
+	if (copy_from_user(user_auxv, (const void __user *)addr, len))
+		return -EFAULT;
+
+	/* Make sure the last entry is always AT_NULL */
+	user_auxv[AT_VECTOR_SIZE - 2] = 0;
+	user_auxv[AT_VECTOR_SIZE - 1] = 0;
+
+	BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+
+	task_lock(current);
+	memcpy(mm->saved_auxv, user_auxv, len);
+	task_unlock(current);
+
+	return 0;
+}
+
 static int prctl_set_mm(int opt, unsigned long addr,
 			unsigned long arg4, unsigned long arg5)
 {
 	struct mm_struct *mm = current->mm;
+	struct prctl_mm_map prctl_map;
 	struct vm_area_struct *vma;
 	int error;
 
@@ -2303,6 +2334,9 @@ static int prctl_set_mm(int opt, unsigned long addr,
 		return error;
 	}
 
+	if (opt == PR_SET_MM_AUXV)
+		return prctl_set_auxv(mm, addr, arg4);
+
 	if (addr >= TASK_SIZE || addr < mmap_min_addr)
 		return -EINVAL;
 
@@ -2311,42 +2345,64 @@ static int prctl_set_mm(int opt, unsigned long addr,
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, addr);
 
+	prctl_map.start_code	= mm->start_code;
+	prctl_map.end_code	= mm->end_code;
+	prctl_map.start_data	= mm->start_data;
+	prctl_map.end_data	= mm->end_data;
+	prctl_map.start_brk	= mm->start_brk;
+	prctl_map.brk		= mm->brk;
+	prctl_map.start_stack	= mm->start_stack;
+	prctl_map.arg_start	= mm->arg_start;
+	prctl_map.arg_end	= mm->arg_end;
+	prctl_map.env_start	= mm->env_start;
+	prctl_map.env_end	= mm->env_end;
+	prctl_map.auxv		= NULL;
+	prctl_map.auxv_size	= 0;
+	prctl_map.exe_fd	= -1;
+
 	switch (opt) {
 	case PR_SET_MM_START_CODE:
-		mm->start_code = addr;
+		prctl_map.start_code = addr;
 		break;
 	case PR_SET_MM_END_CODE:
-		mm->end_code = addr;
+		prctl_map.end_code = addr;
 		break;
 	case PR_SET_MM_START_DATA:
-		mm->start_data = addr;
+		prctl_map.start_data = addr;
 		break;
 	case PR_SET_MM_END_DATA:
-		mm->end_data = addr;
+		prctl_map.end_data = addr;
+		break;
+	case PR_SET_MM_START_STACK:
+		prctl_map.start_stack = addr;
 		break;
-
 	case PR_SET_MM_START_BRK:
-		if (addr <= mm->end_data)
-			goto out;
-
-		if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr,
-				      mm->end_data, mm->start_data))
-			goto out;
-
-		mm->start_brk = addr;
+		prctl_map.start_brk = addr;
 		break;
-
 	case PR_SET_MM_BRK:
-		if (addr <= mm->end_data)
-			goto out;
-
-		if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk,
-				      mm->end_data, mm->start_data))
-			goto out;
-
-		mm->brk = addr;
+		prctl_map.brk = addr;
+		break;
+	case PR_SET_MM_ARG_START:
+		prctl_map.arg_start = addr;
+		break;
+	case PR_SET_MM_ARG_END:
+		prctl_map.arg_end = addr;
+		break;
+	case PR_SET_MM_ENV_START:
+		prctl_map.env_start = addr;
 		break;
+	case PR_SET_MM_ENV_END:
+		prctl_map.env_end = addr;
+		break;
+	default:
+		goto out;
+	}
 
+	error = validate_prctl_map(&prctl_map);
+	if (error)
+		goto out;
+
+	switch (opt) {
 	/*
 	 * If command line arguments and environment
 	 * are placed somewhere else on stack, we can
@@ -2363,52 +2419,20 @@ static int prctl_set_mm(int opt, unsigned long addr,
 			error = -EFAULT;
 			goto out;
 		}
-		if (opt == PR_SET_MM_START_STACK)
-			mm->start_stack = addr;
-		else if (opt == PR_SET_MM_ARG_START)
-			mm->arg_start = addr;
-		else if (opt == PR_SET_MM_ARG_END)
-			mm->arg_end = addr;
-		else if (opt == PR_SET_MM_ENV_START)
-			mm->env_start = addr;
-		else if (opt == PR_SET_MM_ENV_END)
-			mm->env_end = addr;
-		break;
-
-	/*
-	 * This doesn't move auxiliary vector itself
-	 * since it's pinned to mm_struct, but allow
-	 * to fill vector with new values. It's up
-	 * to a caller to provide sane values here
-	 * otherwise user space tools which use this
-	 * vector might be unhappy.
-	 */
-	case PR_SET_MM_AUXV: {
-		unsigned long user_auxv[AT_VECTOR_SIZE];
-
-		if (arg4 > sizeof(user_auxv))
-			goto out;
-		up_read(&mm->mmap_sem);
-
-		if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
-			return -EFAULT;
-
-		/* Make sure the last entry is always AT_NULL */
-		user_auxv[AT_VECTOR_SIZE - 2] = 0;
-		user_auxv[AT_VECTOR_SIZE - 1] = 0;
-
-		BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
-
-		task_lock(current);
-		memcpy(mm->saved_auxv, user_auxv, arg4);
-		task_unlock(current);
-
-		return 0;
-	}
-	default:
-		goto out;
 	}
 
+	mm->start_code	= prctl_map.start_code;
+	mm->end_code	= prctl_map.end_code;
+	mm->start_data	= prctl_map.start_data;
+	mm->end_data	= prctl_map.end_data;
+	mm->start_brk	= prctl_map.start_brk;
+	mm->brk		= prctl_map.brk;
+	mm->start_stack	= prctl_map.start_stack;
+	mm->arg_start	= prctl_map.arg_start;
+	mm->arg_end	= prctl_map.arg_end;
+	mm->env_start	= prctl_map.env_start;
+	mm->env_end	= prctl_map.env_end;
+
 	error = 0;
 out:
 	up_read(&mm->mmap_sem);



More information about the Devel mailing list