[CRIU] [PATCH] criu: always enable the userfaultfd support
Adrian Reber
areber at redhat.com
Wed Apr 20 11:45:17 PDT 2016
Acked-by: Adrian Reber <areber at redhat.com>
I will send a follow-up to do runtime detection of userfaultfd
availability.
Not such a great fan of including kernel headers, btw. I spent once a day
figuring out why some ioctl() didn't work on ppc64le until I figured out
the ioctl number was copied into the criu code but hardcoded to the
x86_64 value which didn't work on ppc64le. I later discovered this was
fixed in git but I was working on a release tarball ;-)
Adrian
On Wed, Apr 20, 2016 at 08:12:54PM +0300, Andrey Vagin wrote:
> From: Andrew Vagin <avagin at virtuozzo.com>
>
> Add linux/userfaultfd.h to criu sources. This header is a part
> of the kernel API and I see nothing wrong to have in the repo.
>
> Why we want to do this:
> * to check that criu works correctly if a kernel doesn't
> support userfaultfd.
> * to check compilation of the userfaultfd part in travis-ci.
>
> Cc: Mike Rapoport <rppt at linux.vnet.ibm.com>
> Cc: Adrian Reber <areber at redhat.com>
> Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> ---
> criu/Makefile.config | 8 --
> criu/arch/arm/syscalls/syscall.def | 1 +
> criu/arch/ppc64/syscalls/syscall-ppc64.tbl | 1 +
> criu/arch/x86/syscalls/syscall_32.tbl | 1 +
> criu/arch/x86/syscalls/syscall_64.tbl | 1 +
> criu/include/linux/userfaultfd.h | 167 +++++++++++++++++++++++++++++
> criu/include/uffd.h | 17 +--
> criu/pie/restorer.c | 11 +-
> criu/uffd.c | 40 +++----
> scripts/feature-tests.mak | 15 ---
> 10 files changed, 194 insertions(+), 68 deletions(-)
> create mode 100644 criu/include/linux/userfaultfd.h
>
> diff --git a/criu/Makefile.config b/criu/Makefile.config
> index 326356e..15af65f 100644
> --- a/criu/Makefile.config
> +++ b/criu/Makefile.config
> @@ -14,10 +14,6 @@ ifeq ($(call pkg-config-check,libselinux),y)
> DEFINES += -DCONFIG_HAS_SELINUX
> endif
>
> -ifeq ($(call try-cc,$(FEATURE_TEST_UFFD)),y)
> - export UFFD := 1
> -endif
> -
> FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
> SETPROCTITLE_INIT MEMFD
>
> @@ -42,10 +38,6 @@ ifeq ($$(VDSO),y)
> $(Q) @echo '#define CONFIG_VDSO' >> $$@
> $(Q) @echo '' >> $$@
> endif
> -ifeq ($$(UFFD),1)
> - $(Q) @echo '#define CONFIG_HAS_UFFD' >> $$@
> - $(Q) @echo '' >> $$@
> -endif
> ifeq ($$(piegen-y),y)
> $(Q) @echo '#define CONFIG_PIEGEN' >> $$@
> $(Q) @echo '' >> $$@
> diff --git a/criu/arch/arm/syscalls/syscall.def b/criu/arch/arm/syscalls/syscall.def
> index a42c1b7..ffa807d 100644
> --- a/criu/arch/arm/syscalls/syscall.def
> +++ b/criu/arch/arm/syscalls/syscall.def
> @@ -106,3 +106,4 @@ io_setup 0 243 (unsigned nr_events, aio_context_t *ctx)
> io_submit 2 246 (aio_context_t ctx_id, long nr, struct iocb **iocbpp)
> io_getevents 4 245 (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
> seccomp 277 383 (unsigned int op, unsigned int flags, const char *uargs)
> +userfaultfd 282 388 (int flags)
> diff --git a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
> index 5087037..d629d5a 100644
> --- a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
> +++ b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
> @@ -103,3 +103,4 @@ __NR_io_setup 227 sys_io_setup (unsigned nr_events, aio_context_t *ctx_idp)
> __NR_io_getevents 229 sys_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
> __NR_io_submit 230 sys_io_submit (aio_context_t ctx_id, long nr, struct iocb **iocbpp)
> __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, unsigned long third, const void *ptr, long fifth)
> +__NR_userfaultfd 364 sys_userfaultfd (int flags)
> diff --git a/criu/arch/x86/syscalls/syscall_32.tbl b/criu/arch/x86/syscalls/syscall_32.tbl
> index 53970f4..7c4867c 100644
> --- a/criu/arch/x86/syscalls/syscall_32.tbl
> +++ b/criu/arch/x86/syscalls/syscall_32.tbl
> @@ -91,3 +91,4 @@ __NR_setns 346 sys_setns (int fd, int nstype)
> __NR_kcmp 349 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
> __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs)
> __NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags)
> +__NR_userfaultfd 374 sys_userfaultfd (int flags)
> diff --git a/criu/arch/x86/syscalls/syscall_64.tbl b/criu/arch/x86/syscalls/syscall_64.tbl
> index 543e851..c0b8d8c 100644
> --- a/criu/arch/x86/syscalls/syscall_64.tbl
> +++ b/criu/arch/x86/syscalls/syscall_64.tbl
> @@ -101,3 +101,4 @@ __NR_open_by_handle_at 304 sys_open_by_handle_at (int mountdirfd, struct file_
> __NR_setns 308 sys_setns (int fd, int nstype)
> __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
> __NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags)
> +__NR_userfaultfd 323 sys_userfaultfd (int flags)
> diff --git a/criu/include/linux/userfaultfd.h b/criu/include/linux/userfaultfd.h
> new file mode 100644
> index 0000000..9057d7a
> --- /dev/null
> +++ b/criu/include/linux/userfaultfd.h
> @@ -0,0 +1,167 @@
> +/*
> + * include/linux/userfaultfd.h
> + *
> + * Copyright (C) 2007 Davide Libenzi <davidel at xmailserver.org>
> + * Copyright (C) 2015 Red Hat, Inc.
> + *
> + */
> +
> +#ifndef _LINUX_USERFAULTFD_H
> +#define _LINUX_USERFAULTFD_H
> +
> +#include <linux/types.h>
> +
> +#define UFFD_API ((__u64)0xAA)
> +/*
> + * After implementing the respective features it will become:
> + * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
> + * UFFD_FEATURE_EVENT_FORK)
> + */
> +#define UFFD_API_FEATURES (0)
> +#define UFFD_API_IOCTLS \
> + ((__u64)1 << _UFFDIO_REGISTER | \
> + (__u64)1 << _UFFDIO_UNREGISTER | \
> + (__u64)1 << _UFFDIO_API)
> +#define UFFD_API_RANGE_IOCTLS \
> + ((__u64)1 << _UFFDIO_WAKE | \
> + (__u64)1 << _UFFDIO_COPY | \
> + (__u64)1 << _UFFDIO_ZEROPAGE)
> +
> +/*
> + * Valid ioctl command number range with this API is from 0x00 to
> + * 0x3F. UFFDIO_API is the fixed number, everything else can be
> + * changed by implementing a different UFFD_API. If sticking to the
> + * same UFFD_API more ioctl can be added and userland will be aware of
> + * which ioctl the running kernel implements through the ioctl command
> + * bitmask written by the UFFDIO_API.
> + */
> +#define _UFFDIO_REGISTER (0x00)
> +#define _UFFDIO_UNREGISTER (0x01)
> +#define _UFFDIO_WAKE (0x02)
> +#define _UFFDIO_COPY (0x03)
> +#define _UFFDIO_ZEROPAGE (0x04)
> +#define _UFFDIO_API (0x3F)
> +
> +/* userfaultfd ioctl ids */
> +#define UFFDIO 0xAA
> +#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
> + struct uffdio_api)
> +#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
> + struct uffdio_register)
> +#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
> + struct uffdio_range)
> +#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
> + struct uffdio_range)
> +#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
> + struct uffdio_copy)
> +#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
> + struct uffdio_zeropage)
> +
> +/* read() structure */
> +struct uffd_msg {
> + __u8 event;
> +
> + __u8 reserved1;
> + __u16 reserved2;
> + __u32 reserved3;
> +
> + union {
> + struct {
> + __u64 flags;
> + __u64 address;
> + } pagefault;
> +
> + struct {
> + /* unused reserved fields */
> + __u64 reserved1;
> + __u64 reserved2;
> + __u64 reserved3;
> + } reserved;
> + } arg;
> +} __packed;
> +
> +/*
> + * Start at 0x12 and not at 0 to be more strict against bugs.
> + */
> +#define UFFD_EVENT_PAGEFAULT 0x12
> +#if 0 /* not available yet */
> +#define UFFD_EVENT_FORK 0x13
> +#endif
> +
> +/* flags for UFFD_EVENT_PAGEFAULT */
> +#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
> +#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
> +
> +struct uffdio_api {
> + /* userland asks for an API number and the features to enable */
> + __u64 api;
> + /*
> + * Kernel answers below with the all available features for
> + * the API, this notifies userland of which events and/or
> + * which flags for each event are enabled in the current
> + * kernel.
> + *
> + * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
> + * are to be considered implicitly always enabled in all kernels as
> + * long as the uffdio_api.api requested matches UFFD_API.
> + */
> +#if 0 /* not available yet */
> +#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
> +#define UFFD_FEATURE_EVENT_FORK (1<<1)
> +#endif
> + __u64 features;
> +
> + __u64 ioctls;
> +};
> +
> +struct uffdio_range {
> + __u64 start;
> + __u64 len;
> +};
> +
> +struct uffdio_register {
> + struct uffdio_range range;
> +#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
> +#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
> + __u64 mode;
> +
> + /*
> + * kernel answers which ioctl commands are available for the
> + * range, keep at the end as the last 8 bytes aren't read.
> + */
> + __u64 ioctls;
> +};
> +
> +struct uffdio_copy {
> + __u64 dst;
> + __u64 src;
> + __u64 len;
> + /*
> + * There will be a wrprotection flag later that allows to map
> + * pages wrprotected on the fly. And such a flag will be
> + * available if the wrprotection ioctl are implemented for the
> + * range according to the uffdio_register.ioctls.
> + */
> +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
> + __u64 mode;
> +
> + /*
> + * "copy" is written by the ioctl and must be at the end: the
> + * copy_from_user will not read the last 8 bytes.
> + */
> + __s64 copy;
> +};
> +
> +struct uffdio_zeropage {
> + struct uffdio_range range;
> +#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
> + __u64 mode;
> +
> + /*
> + * "zeropage" is written by the ioctl and must be at the end:
> + * the copy_from_user will not read the last 8 bytes.
> + */
> + __s64 zeropage;
> +};
> +
> +#endif /* _LINUX_USERFAULTFD_H */
> diff --git a/criu/include/uffd.h b/criu/include/uffd.h
> index 6c931e2..4e1ba4a 100644
> --- a/criu/include/uffd.h
> +++ b/criu/include/uffd.h
> @@ -1,22 +1,7 @@
> #ifndef __CR_UFFD_H_
> #define __CR_UFFD_H_
>
> -#include "config.h"
> -#include "restorer.h"
> -
> -#ifdef CONFIG_HAS_UFFD
> -
> -#include <syscall.h>
> -#include <linux/userfaultfd.h>
> -
> -#ifndef __NR_userfaultfd
> -#error "missing __NR_userfaultfd definition"
> -#endif
> -
> +struct task_restore_args;
> extern int setup_uffd(struct task_restore_args *task_args, int pid);
> -#else
> -static inline int setup_uffd(struct task_restore_args *task_args, int pid) { return 0; }
> -
> -#endif /* CONFIG_HAS_UFFD */
>
> #endif /* __CR_UFFD_H_ */
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 59e1723..5fc8949 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -16,6 +16,8 @@
> #include <sys/resource.h>
> #include <signal.h>
>
> +#include "linux/userfaultfd.h"
> +
> #include "compiler.h"
> #include "asm/string.h"
> #include "asm/types.h"
> @@ -704,16 +706,16 @@ static void rst_tcp_socks_all(struct task_restore_args *ta)
>
> static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
> {
> + int rc;
> + struct uffdio_register uffdio_register;
> + unsigned long expected_ioctls;
> +
> /*
> * If uffd == -1, this means that userfaultfd is not enabled
> * or it is not available.
> */
> if (uffd == -1)
> return 0;
> -#ifdef CONFIG_HAS_UFFD
> - int rc;
> - struct uffdio_register uffdio_register;
> - unsigned long expected_ioctls;
>
> uffdio_register.range.start = addr;
> uffdio_register.range.len = len;
> @@ -733,7 +735,6 @@ static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
> pr_err("lazy-pages: unexpected missing uffd ioctl for anon memory\n");
> }
>
> -#endif
> return 0;
> }
>
> diff --git a/criu/uffd.c b/criu/uffd.c
> index 254a5b3..6f5e774 100644
> --- a/criu/uffd.c
> +++ b/criu/uffd.c
> @@ -15,21 +15,23 @@
> #include <sys/un.h>
> #include <sys/socket.h>
>
> +#include "linux/userfaultfd.h"
> +
> #include "asm/page.h"
> -#include "include/log.h"
> -#include "include/criu-plugin.h"
> -#include "include/page-read.h"
> -#include "include/files-reg.h"
> -#include "include/kerndat.h"
> -#include "include/mem.h"
> -#include "include/uffd.h"
> -#include "include/util-pie.h"
> -#include "include/pstree.h"
> -#include "include/crtools.h"
> -#include "include/cr_options.h"
> +#include "log.h"
> +#include "criu-plugin.h"
> +#include "page-read.h"
> +#include "files-reg.h"
> +#include "kerndat.h"
> +#include "mem.h"
> +#include "uffd.h"
> +#include "util-pie.h"
> +#include "pstree.h"
> +#include "crtools.h"
> +#include "cr_options.h"
> #include "xmalloc.h"
> -
> -#ifdef CONFIG_HAS_UFFD
> +#include "syscall-codes.h"
> +#include "restorer.h"
>
> #undef LOG_PREFIX
> #define LOG_PREFIX "lazy-pages: "
> @@ -94,7 +96,7 @@ int setup_uffd(struct task_restore_args *task_args, int pid)
> * Open userfaulfd FD which is passed to the restorer blob and
> * to a second process handling the userfaultfd page faults.
> */
> - task_args->uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
> + task_args->uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>
> /*
> * Check if the UFFD_API is the one which is expected
> @@ -680,13 +682,3 @@ int cr_lazy_pages()
>
> return ret;
> }
> -
> -#else /* CONFIG_HAS_UFFD */
> -
> -int cr_lazy_pages()
> -{
> - pr_msg("userfaultfd system call is not supported, cannot start lazy-pages daemon\n");
> - return -1;
> -}
> -
> -#endif /* CONFIG_HAS_UFFD */
> diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
> index c48b52e..c3dc85e 100644
> --- a/scripts/feature-tests.mak
> +++ b/scripts/feature-tests.mak
> @@ -90,18 +90,3 @@ int main(void)
> }
>
> endef
> -
> -define FEATURE_TEST_UFFD
> -
> -#include <syscall.h>
> -#include <linux/userfaultfd.h>
> -
> -int main(void)
> -{
> -#ifndef __NR_userfaultfd
> -#error "missing __NR_userfaultfd definition"
> -#endif
> - return 0;
> -}
> -
> -endef
> --
> 2.5.0
>
More information about the CRIU
mailing list