[CRIU] [PATCH 3/3] ia32/kdat: Check for 32-bit mmap() bug
Andrei Vagin
avagin at virtuozzo.com
Fri Apr 21 16:42:42 PDT 2017
On Fri, Apr 21, 2017 at 11:28:12PM +0300, Dmitry Safonov wrote:
> There were kernel bug with 32-bit mmap() returning 64-bit pointer.
> The fix is in linux-next, will go to v4.12 kernel.
> Checkpointing after v4.9 kernel works good, but restoring will
> result in application which will mmap() 64-bit addresses resulting
> in segfault/memory corruptions/etc.
> As our policy is fail on dump if we can't restore on the same target,
> error checkpointing for v4.9.
Was the fix for this kernel issue applied to stable kernels (4.9, 4.10)?
If the answer is yes, I don't think that we need to add this check.
Pavel found that we spend a lot of time in kdat code, so we need to
think how to optimize it instead of adding new thing into it.
>
> Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
> ---
> criu/arch/x86/crtools.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++--
> criu/cr-check.c | 2 +-
> 2 files changed, 138 insertions(+), 6 deletions(-)
>
> diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
> index d0121b48e308..f353ae6df906 100644
> --- a/criu/arch/x86/crtools.c
> +++ b/criu/arch/x86/crtools.c
> @@ -1,9 +1,12 @@
> +#include <signal.h>
> +#include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> #include <elf.h>
> #include <sys/mman.h>
> #include <sys/syscall.h>
> #include <sys/auxv.h>
> +#include <sys/wait.h>
>
> #include "types.h"
> #include "log.h"
> @@ -26,17 +29,22 @@
> #include "images/core.pb-c.h"
> #include "images/creds.pb-c.h"
>
> -int kdat_compatible_cr(void)
> -{
> #ifdef CONFIG_COMPAT
> +static int has_arch_map_vdso(void)
> +{
> unsigned long auxval;
> int ret;
>
> errno = 0;
> auxval = getauxval(AT_SYSINFO_EHDR);
> - if (!auxval || errno == ENOENT) {
> - pr_err("Failed to get auxval, err: %lu\n", auxval);
> - return 0;
> + if (!auxval) {
> + if (errno == ENOENT) { /* No vDSO - OK */
> + pr_warn("No SYSINFO_EHDR - no vDSO\n");
> + return 1;
> + } else { /* That can't happen, according to man */
> + pr_err("Failed to get auxval: errno %d\n", errno);
> + return -1;
> + }
> }
> /*
> * Mapping vDSO while have not unmap it yet:
> @@ -45,10 +53,134 @@ int kdat_compatible_cr(void)
> ret = syscall(SYS_arch_prctl, ARCH_MAP_VDSO_32, 1);
> if (ret == -1 && errno == EEXIST)
> return 1;
> + return 0;
> +}
> +
> +#ifndef __NR32_mmap2
> +# define __NR32_mmap2 192
> #endif
> +
> +struct syscall_args32 {
> + uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
> +};
> +
> +static void do_full_int80(struct syscall_args32 *args)
> +{
> + asm volatile ("int $0x80"
> + : "+a" (args->nr),
> + "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
> + "+S" (args->arg3), "+D" (args->arg4),
> + "+r" (args->arg5)
> + : : "r8", "r9", "r10", "r11");
> +}
> +
> +void *mmap_ia32(void *addr, size_t len, int prot,
> + int flags, int fildes, off_t off)
> +{
> + struct syscall_args32 s;
> +
> + s.nr = __NR32_mmap2;
> + s.arg0 = (uint32_t)(uintptr_t)addr;
> + s.arg1 = (uint32_t)len;
> + s.arg2 = prot;
> + s.arg3 = flags;
> + s.arg4 = fildes;
> + s.arg5 = (uint32_t)off;
> +
> + do_full_int80(&s);
> +
> + return (void *)(uintptr_t)s.nr;
> +}
> +
> +/*
> + * The idea of the test:
> + * From kernel's top-down allocator we assume here that
> + * 1. A = mmap(0, ...); munmap(A);
> + * 2. B = mmap(0, ...);
> + * results in A == B.
> + * ...but if we have 32-bit mmap() bug, then A will have only lower
> + * 4 bytes of 64-bit address allocated with mmap().
> + * That means, that the next mmap() will return B != A
> + * (as munmap(A) hasn't really unmapped A mapping).
> + *
> + * As mapping with lower 4 bytes of A may really exist, we run
> + * this test under fork().
> + *
> + * Another approach to test bug's presence would be to parse
> + * /proc/self/maps before and after 32-bit mmap(), but that would
> + * be soo slow.
> + */
> +static void mmap_bug_test(void)
> +{
> + void *map1, *map2;
> + int err;
> +
> + map1 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
> + /* 32-bit error, not sign-extended - can't use IS_ERR_VALUE() here */
> + err = (uintptr_t)map1 % PAGE_SIZE;
> + if (err) {
> + pr_err("ia32 mmap() failed: %d\n", err);
> + exit(1);
> + }
> +
> + if (munmap(map1, PAGE_SIZE)) {
> + pr_err("Failed to unmap() 32-bit mapping: %m\n");
> + exit(1);
> + }
> +
> + map2 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
> + err = (uintptr_t)map2 % PAGE_SIZE;
> + if (err) {
> + pr_err("ia32 mmap() failed: %d\n", err);
> + exit(1);
> + }
> +
> + if (map1 != map2)
> + exit(1);
> + exit(0);
> +}
> +
> +/*
> + * Pre v4.12 kernels have a bug: for a process started as 64-bit
> + * 32-bit mmap() may return 8 byte pointer.
> + * Which is fatal for us: after 32-bit C/R a task will map 64-bit
> + * addresses, cut upper 4 bytes and try to use lower 4 bytes.
> + * This is a check if the bug was fixed in the kernel.
> + */
> +static int has_32bit_mmap_bug(void)
> +{
> + pid_t child = fork();
> + int stat;
> +
> + if (child == 0)
> + mmap_bug_test();
> +
> + if (waitpid(child, &stat, 0) != child) {
> + pr_err("Failed to wait for mmap test");
> + kill(child, SIGKILL);
> + return -1;
> + }
> +
> + if (!WIFEXITED(stat) || WEXITSTATUS(stat) != 0)
> + return 1;
> return 0;
> }
>
> +int kdat_compatible_cr(void)
> +{
> + if (!has_arch_map_vdso())
> + return 0;
> + if (has_32bit_mmap_bug())
> + return 0;
> + return 1;
> +}
> +#else
> +int kdat_compatible_cr(void)
> +{
> + return 0;
> +}
> +#endif
> +
> int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
> {
> CoreEntry *core = x;
> diff --git a/criu/cr-check.c b/criu/cr-check.c
> index ec0020dba4fc..ac2a7d78a506 100644
> --- a/criu/cr-check.c
> +++ b/criu/cr-check.c
> @@ -1061,7 +1061,7 @@ static int check_compat_cr(void)
> #ifdef CONFIG_COMPAT
> if (kdat_compatible_cr())
> return 0;
> - pr_warn("compat_cr is not supported. Requires kernel >= v4.9\n");
> + pr_warn("compat_cr is not supported. Requires kernel >= v4.12\n");
> #else
> pr_warn("CRIU built without CONFIG_COMPAT - can't C/R ia32\n");
> #endif
> --
> 2.12.2
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list