[Devel] [PATCH RHEL7 COMMIT] ms/lib: introduce copy_struct_from_user() helper

Konstantin Khorenko khorenko at virtuozzo.com
Thu Apr 20 21:00:48 MSK 2023


The commit is pushed to "branch-rh7-3.10.0-1160.88.1.vz7.195.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.88.1.vz7.195.2
------>
commit 17598999b4144ec888f8bc875637eb40a80bf4c4
Author: Aleksa Sarai <cyphar at cyphar.com>
Date:   Thu Apr 13 18:47:13 2023 +0800

    ms/lib: introduce copy_struct_from_user() helper
    
    A common pattern for syscall extensions is increasing the size of a
    struct passed from userspace, such that the zero-value of the new fields
    result in the old kernel behaviour (allowing for a mix of userspace and
    kernel vintages to operate on one another in most cases).
    
    While this interface exists for communication in both directions, only
    one interface is straightforward to have reasonable semantics for
    (userspace passing a struct to the kernel). For kernel returns to
    userspace, what the correct semantics are (whether there should be an
    error if userspace is unaware of a new extension) is very
    syscall-dependent and thus probably cannot be unified between syscalls
    (a good example of this problem is [1]).
    
    Previously there was no common lib/ function that implemented
    the necessary extension-checking semantics (and different syscalls
    implemented them slightly differently or incompletely[2]). Future
    patches replace common uses of this pattern to make use of
    copy_struct_from_user().
    
    Some in-kernel selftests that insure that the handling of alignment and
    various byte patterns are all handled identically to memchr_inv() usage.
    
    [1]: commit 1251201c0d34 ("sched/core: Fix uclamp ABI bug, clean up and
         robustify sched_read_attr() ABI logic and code")
    
    [2]: For instance {sched_setattr,perf_event_open,clone3}(2) all do do
         similar checks to copy_struct_from_user() while rt_sigprocmask(2)
         always rejects differently-sized struct arguments.
    
    Suggested-by: Rasmus Villemoes <linux at rasmusvillemoes.dk>
    Signed-off-by: Aleksa Sarai <cyphar at cyphar.com>
    Reviewed-by: Kees Cook <keescook at chromium.org>
    Reviewed-by: Christian Brauner <christian.brauner at ubuntu.com>
    Link: https://lore.kernel.org/r/20191001011055.19283-2-cyphar@cyphar.com
    Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
    
    - Need this for mount_setxattr syscall.
    - Dropped lib/test_user_copy.c hunks when rebasing.
    - Move aligned_byte_mask to vz_bitops.h else something breaks on boot
    after moving it to bitops.h.
    - Add unsafe_get_user, user_access_begin and user_access_end helpers.
    
    https://jira.vzint.dev/browse/PSBM-144416
    (cherry picked from commit f5a1a536fa14895ccff4e94e6a5af90901ce86aa)
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    =================
    Patchset description:
    mount: Port move_mount_set_group and mount_setattr
    
    We need this as in Virtuozzo criu after rebase to mainstream criu in u20
    we will switch to this new API for sharing group setting accross mounts.
    
    https://jira.vzint.dev/browse/PSBM-144416
---
 include/linux/uaccess.h   | 70 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/vz_bitops.h | 11 +++++++
 lib/strnlen_user.c        |  8 +----
 lib/usercopy.c            | 76 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 158 insertions(+), 7 deletions(-)

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index b28cc68c1896..9f8da4aa31a1 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -112,6 +112,76 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 		ret;					\
 	})
 
+extern __must_check int check_zeroed_user(const void __user *from, size_t size);
+
+/**
+ * copy_struct_from_user: copy a struct from userspace
+ * @dst:   Destination address, in kernel space. This buffer must be @ksize
+ *         bytes long.
+ * @ksize: Size of @dst struct.
+ * @src:   Source address, in userspace.
+ * @usize: (Alleged) size of @src struct.
+ *
+ * Copies a struct from userspace to kernel space, in a way that guarantees
+ * backwards-compatibility for struct syscall arguments (as long as future
+ * struct extensions are made such that all new fields are *appended* to the
+ * old struct, and zeroed-out new fields have the same meaning as the old
+ * struct).
+ *
+ * @ksize is just sizeof(*dst), and @usize should've been passed by userspace.
+ * The recommended usage is something like the following:
+ *
+ *   SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize)
+ *   {
+ *      int err;
+ *      struct foo karg = {};
+ *
+ *      if (usize > PAGE_SIZE)
+ *        return -E2BIG;
+ *      if (usize < FOO_SIZE_VER0)
+ *        return -EINVAL;
+ *
+ *      err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize);
+ *      if (err)
+ *        return err;
+ *
+ *      // ...
+ *   }
+ *
+ * There are three cases to consider:
+ *  * If @usize == @ksize, then it's copied verbatim.
+ *  * If @usize < @ksize, then the userspace has passed an old struct to a
+ *    newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize)
+ *    are to be zero-filled.
+ *  * If @usize > @ksize, then the userspace has passed a new struct to an
+ *    older kernel. The trailing bytes unknown to the kernel (@usize - @ksize)
+ *    are checked to ensure they are zeroed, otherwise -E2BIG is returned.
+ *
+ * Returns (in all cases, some data may have been copied):
+ *  * -E2BIG:  (@usize > @ksize) and there are non-zero trailing bytes in @src.
+ *  * -EFAULT: access to userspace failed.
+ */
+static __always_inline __must_check int
+copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
+		      size_t usize)
+{
+	size_t size = min(ksize, usize);
+	size_t rest = max(ksize, usize) - size;
+
+	/* Deal with trailing bytes. */
+	if (usize < ksize) {
+		memset(dst + size, 0, rest);
+	} else if (usize > ksize) {
+		int ret = check_zeroed_user(src + size, rest);
+		if (ret <= 0)
+			return ret ?: -E2BIG;
+	}
+	/* Copy the interoperable parts of the struct. */
+	if (copy_from_user(dst, src, size))
+		return -EFAULT;
+	return 0;
+}
+
 /*
  * probe_kernel_read(): safely attempt to read from a location
  * @dst: pointer to the buffer that shall take the data
diff --git a/include/linux/vz_bitops.h b/include/linux/vz_bitops.h
new file mode 100644
index 000000000000..b8d29ce0b61d
--- /dev/null
+++ b/include/linux/vz_bitops.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_VZ_BITOPS_H
+#define _LINUX_VZ_BITOPS_H
+#include <asm/types.h>
+
+/* Set bits in the first 'n' bytes when loaded from memory */
+#ifdef __LITTLE_ENDIAN
+#  define aligned_byte_mask(n) ((1UL << 8*(n))-1)
+#else
+#  define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n)))
+#endif
+#endif
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 36c15a2889e4..5822d28f0cd7 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -1,16 +1,10 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
+#include <linux/vz_bitops.h>
 
 #include <asm/word-at-a-time.h>
 
-/* Set bits in the first 'n' bytes when loaded from memory */
-#ifdef __LITTLE_ENDIAN
-#  define aligned_byte_mask(n) ((1ul << 8*(n))-1)
-#else
-#  define aligned_byte_mask(n) (~0xfful << (BITS_PER_LONG - 8 - 8*(n)))
-#endif
-
 /*
  * Do a strnlen, return length of string *with* final '\0'.
  * 'count' is the user-supplied count, while 'max' is the
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 4f5b1ddbcd25..243e4f7d4bd4 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -1,9 +1,85 @@
 #include <linux/export.h>
 #include <linux/bug.h>
 #include <linux/uaccess.h>
+#include <linux/vz_bitops.h>
 
 void copy_from_user_overflow(void)
 {
 	WARN(1, "Buffer overflow detected!\n");
 }
 EXPORT_SYMBOL(copy_from_user_overflow);
+
+/*
+ * The "unsafe" user accesses aren't really "unsafe", but the naming
+ * is a big fat warning: you have to not only do the access_ok()
+ * checking before using them, but you have to surround them with the
+ * user_access_begin/end() pair.
+ */
+static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+	if (unlikely(!access_ok(0,ptr,len)))
+		return 0;
+	__uaccess_begin();
+	return 1;
+}
+#define user_access_end()      __uaccess_end()
+
+#define unsafe_get_user(x, ptr)                                                \
+({                                                                             \
+	 int __gu_err;                                                           \
+	 unsigned long __gu_val;                                                 \
+	 __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);    \
+	 (x) = (__force __typeof__(*(ptr)))__gu_val;                             \
+	 __builtin_expect(__gu_err, 0);                                          \
+ })
+
+/**
+ * check_zeroed_user: check if a userspace buffer only contains zero bytes
+ * @from: Source address, in userspace.
+ * @size: Size of buffer.
+ *
+ * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for
+ * userspace addresses (and is more efficient because we don't care where the
+ * first non-zero byte is).
+ *
+ * Returns:
+ *  * 0: There were non-zero bytes present in the buffer.
+ *  * 1: The buffer was full of zero bytes.
+ *  * -EFAULT: access to userspace failed.
+ */
+int check_zeroed_user(const void __user *from, size_t size)
+{
+	unsigned long val;
+	uintptr_t align = (uintptr_t) from % sizeof(unsigned long);
+
+	if (unlikely(size == 0))
+		return 1;
+
+	from -= align;
+	size += align;
+
+	if (!user_access_begin(from, size))
+		return -EFAULT;
+
+	unsafe_get_user(val, (unsigned long __user *) from);
+	if (align)
+		val &= ~aligned_byte_mask(align);
+
+	while (size > sizeof(unsigned long)) {
+		if (unlikely(val))
+			goto done;
+
+		from += sizeof(unsigned long);
+		size -= sizeof(unsigned long);
+
+		unsafe_get_user(val, (unsigned long __user *) from);
+	}
+
+	if (size < sizeof(unsigned long))
+		val &= aligned_byte_mask(size);
+
+done:
+	user_access_end();
+	return (val == 0);
+}
+EXPORT_SYMBOL(check_zeroed_user);


More information about the Devel mailing list