[CRIU] [PATCH] criu: always enable the userfaultfd support

Adrian Reber areber at redhat.com
Wed Apr 20 11:45:17 PDT 2016


Acked-by: Adrian Reber <areber at redhat.com>

I will send a follow-up to do runtime detection of userfaultfd
availability.

Not such a great fan of including kernel headers, btw. I spent once a day
figuring out why some ioctl() didn't work on ppc64le until I figured out
the ioctl number was copied into the criu code but hardcoded to the
x86_64 value which didn't work on ppc64le. I later discovered this was
fixed in git but I was working on a release tarball ;-)

		Adrian

On Wed, Apr 20, 2016 at 08:12:54PM +0300, Andrey Vagin wrote:
> From: Andrew Vagin <avagin at virtuozzo.com>
> 
> Add linux/userfaultfd.h to criu sources. This header is a part
> of the kernel API and I see nothing wrong to have in the repo.
> 
> Why we want to do this:
> * to check that criu works correctly if a kernel doesn't
>   support userfaultfd.
> * to check compilation of the userfaultfd part in travis-ci.
> 
> Cc: Mike Rapoport <rppt at linux.vnet.ibm.com>
> Cc: Adrian Reber <areber at redhat.com>
> Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> ---
>  criu/Makefile.config                       |   8 --
>  criu/arch/arm/syscalls/syscall.def         |   1 +
>  criu/arch/ppc64/syscalls/syscall-ppc64.tbl |   1 +
>  criu/arch/x86/syscalls/syscall_32.tbl      |   1 +
>  criu/arch/x86/syscalls/syscall_64.tbl      |   1 +
>  criu/include/linux/userfaultfd.h           | 167 +++++++++++++++++++++++++++++
>  criu/include/uffd.h                        |  17 +--
>  criu/pie/restorer.c                        |  11 +-
>  criu/uffd.c                                |  40 +++----
>  scripts/feature-tests.mak                  |  15 ---
>  10 files changed, 194 insertions(+), 68 deletions(-)
>  create mode 100644 criu/include/linux/userfaultfd.h
> 
> diff --git a/criu/Makefile.config b/criu/Makefile.config
> index 326356e..15af65f 100644
> --- a/criu/Makefile.config
> +++ b/criu/Makefile.config
> @@ -14,10 +14,6 @@ ifeq ($(call pkg-config-check,libselinux),y)
>          DEFINES	+= -DCONFIG_HAS_SELINUX
>  endif
>  
> -ifeq ($(call try-cc,$(FEATURE_TEST_UFFD)),y)
> -	export UFFD := 1
> -endif
> -
>  FEATURES_LIST	:= TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
>  	SETPROCTITLE_INIT MEMFD
>  
> @@ -42,10 +38,6 @@ ifeq ($$(VDSO),y)
>  	$(Q) @echo '#define CONFIG_VDSO'				>> $$@
>  	$(Q) @echo ''							>> $$@
>  endif
> -ifeq ($$(UFFD),1)
> -	$(Q) @echo '#define CONFIG_HAS_UFFD'				>> $$@
> -	$(Q) @echo ''							>> $$@
> -endif
>  ifeq ($$(piegen-y),y)
>  	$(Q) @echo '#define CONFIG_PIEGEN'				>> $$@
>  	$(Q) @echo ''							>> $$@
> diff --git a/criu/arch/arm/syscalls/syscall.def b/criu/arch/arm/syscalls/syscall.def
> index a42c1b7..ffa807d 100644
> --- a/criu/arch/arm/syscalls/syscall.def
> +++ b/criu/arch/arm/syscalls/syscall.def
> @@ -106,3 +106,4 @@ io_setup			0	243	(unsigned nr_events, aio_context_t *ctx)
>  io_submit			2	246	(aio_context_t ctx_id, long nr, struct iocb **iocbpp)
>  io_getevents			4	245	(aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
>  seccomp				277	383	(unsigned int op, unsigned int flags, const char *uargs)
> +userfaultfd			282	388	(int flags)
> diff --git a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
> index 5087037..d629d5a 100644
> --- a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
> +++ b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
> @@ -103,3 +103,4 @@ __NR_io_setup		227		sys_io_setup		(unsigned nr_events, aio_context_t *ctx_idp)
>  __NR_io_getevents	229		sys_io_getevents	(aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
>  __NR_io_submit		230		sys_io_submit		(aio_context_t ctx_id, long nr, struct iocb **iocbpp)
>  __NR_ipc		117		sys_ipc			(unsigned int call, int first, unsigned long second, unsigned long third, const void *ptr, long fifth)
> +__NR_userfaultfd	364		sys_userfaultfd		(int flags)
> diff --git a/criu/arch/x86/syscalls/syscall_32.tbl b/criu/arch/x86/syscalls/syscall_32.tbl
> index 53970f4..7c4867c 100644
> --- a/criu/arch/x86/syscalls/syscall_32.tbl
> +++ b/criu/arch/x86/syscalls/syscall_32.tbl
> @@ -91,3 +91,4 @@ __NR_setns		346		sys_setns		(int fd, int nstype)
>  __NR_kcmp		349		sys_kcmp		(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
>  __NR_seccomp		354		sys_seccomp		(unsigned int op, unsigned int flags, const char *uargs)
>  __NR_memfd_create	356		sys_memfd_create	(const char *name, unsigned int flags)
> +__NR_userfaultfd	374		sys_userfaultfd		(int flags)
> diff --git a/criu/arch/x86/syscalls/syscall_64.tbl b/criu/arch/x86/syscalls/syscall_64.tbl
> index 543e851..c0b8d8c 100644
> --- a/criu/arch/x86/syscalls/syscall_64.tbl
> +++ b/criu/arch/x86/syscalls/syscall_64.tbl
> @@ -101,3 +101,4 @@ __NR_open_by_handle_at		304		sys_open_by_handle_at	(int mountdirfd, struct file_
>  __NR_setns			308		sys_setns		(int fd, int nstype)
>  __NR_kcmp			312		sys_kcmp		(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
>  __NR_memfd_create		319		sys_memfd_create	(const char *name, unsigned int flags)
> +__NR_userfaultfd		323		sys_userfaultfd		(int flags)
> diff --git a/criu/include/linux/userfaultfd.h b/criu/include/linux/userfaultfd.h
> new file mode 100644
> index 0000000..9057d7a
> --- /dev/null
> +++ b/criu/include/linux/userfaultfd.h
> @@ -0,0 +1,167 @@
> +/*
> + *  include/linux/userfaultfd.h
> + *
> + *  Copyright (C) 2007  Davide Libenzi <davidel at xmailserver.org>
> + *  Copyright (C) 2015  Red Hat, Inc.
> + *
> + */
> +
> +#ifndef _LINUX_USERFAULTFD_H
> +#define _LINUX_USERFAULTFD_H
> +
> +#include <linux/types.h>
> +
> +#define UFFD_API ((__u64)0xAA)
> +/*
> + * After implementing the respective features it will become:
> + * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
> + *			      UFFD_FEATURE_EVENT_FORK)
> + */
> +#define UFFD_API_FEATURES (0)
> +#define UFFD_API_IOCTLS				\
> +	((__u64)1 << _UFFDIO_REGISTER |		\
> +	 (__u64)1 << _UFFDIO_UNREGISTER |	\
> +	 (__u64)1 << _UFFDIO_API)
> +#define UFFD_API_RANGE_IOCTLS			\
> +	((__u64)1 << _UFFDIO_WAKE |		\
> +	 (__u64)1 << _UFFDIO_COPY |		\
> +	 (__u64)1 << _UFFDIO_ZEROPAGE)
> +
> +/*
> + * Valid ioctl command number range with this API is from 0x00 to
> + * 0x3F.  UFFDIO_API is the fixed number, everything else can be
> + * changed by implementing a different UFFD_API. If sticking to the
> + * same UFFD_API more ioctl can be added and userland will be aware of
> + * which ioctl the running kernel implements through the ioctl command
> + * bitmask written by the UFFDIO_API.
> + */
> +#define _UFFDIO_REGISTER		(0x00)
> +#define _UFFDIO_UNREGISTER		(0x01)
> +#define _UFFDIO_WAKE			(0x02)
> +#define _UFFDIO_COPY			(0x03)
> +#define _UFFDIO_ZEROPAGE		(0x04)
> +#define _UFFDIO_API			(0x3F)
> +
> +/* userfaultfd ioctl ids */
> +#define UFFDIO 0xAA
> +#define UFFDIO_API		_IOWR(UFFDIO, _UFFDIO_API,	\
> +				      struct uffdio_api)
> +#define UFFDIO_REGISTER		_IOWR(UFFDIO, _UFFDIO_REGISTER, \
> +				      struct uffdio_register)
> +#define UFFDIO_UNREGISTER	_IOR(UFFDIO, _UFFDIO_UNREGISTER,	\
> +				     struct uffdio_range)
> +#define UFFDIO_WAKE		_IOR(UFFDIO, _UFFDIO_WAKE,	\
> +				     struct uffdio_range)
> +#define UFFDIO_COPY		_IOWR(UFFDIO, _UFFDIO_COPY,	\
> +				      struct uffdio_copy)
> +#define UFFDIO_ZEROPAGE		_IOWR(UFFDIO, _UFFDIO_ZEROPAGE,	\
> +				      struct uffdio_zeropage)
> +
> +/* read() structure */
> +struct uffd_msg {
> +	__u8	event;
> +
> +	__u8	reserved1;
> +	__u16	reserved2;
> +	__u32	reserved3;
> +
> +	union {
> +		struct {
> +			__u64	flags;
> +			__u64	address;
> +		} pagefault;
> +
> +		struct {
> +			/* unused reserved fields */
> +			__u64	reserved1;
> +			__u64	reserved2;
> +			__u64	reserved3;
> +		} reserved;
> +	} arg;
> +} __packed;
> +
> +/*
> + * Start at 0x12 and not at 0 to be more strict against bugs.
> + */
> +#define UFFD_EVENT_PAGEFAULT	0x12
> +#if 0 /* not available yet */
> +#define UFFD_EVENT_FORK		0x13
> +#endif
> +
> +/* flags for UFFD_EVENT_PAGEFAULT */
> +#define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
> +#define UFFD_PAGEFAULT_FLAG_WP		(1<<1)	/* If reason is VM_UFFD_WP */
> +
> +struct uffdio_api {
> +	/* userland asks for an API number and the features to enable */
> +	__u64 api;
> +	/*
> +	 * Kernel answers below with the all available features for
> +	 * the API, this notifies userland of which events and/or
> +	 * which flags for each event are enabled in the current
> +	 * kernel.
> +	 *
> +	 * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
> +	 * are to be considered implicitly always enabled in all kernels as
> +	 * long as the uffdio_api.api requested matches UFFD_API.
> +	 */
> +#if 0 /* not available yet */
> +#define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
> +#define UFFD_FEATURE_EVENT_FORK			(1<<1)
> +#endif
> +	__u64 features;
> +
> +	__u64 ioctls;
> +};
> +
> +struct uffdio_range {
> +	__u64 start;
> +	__u64 len;
> +};
> +
> +struct uffdio_register {
> +	struct uffdio_range range;
> +#define UFFDIO_REGISTER_MODE_MISSING	((__u64)1<<0)
> +#define UFFDIO_REGISTER_MODE_WP		((__u64)1<<1)
> +	__u64 mode;
> +
> +	/*
> +	 * kernel answers which ioctl commands are available for the
> +	 * range, keep at the end as the last 8 bytes aren't read.
> +	 */
> +	__u64 ioctls;
> +};
> +
> +struct uffdio_copy {
> +	__u64 dst;
> +	__u64 src;
> +	__u64 len;
> +	/*
> +	 * There will be a wrprotection flag later that allows to map
> +	 * pages wrprotected on the fly. And such a flag will be
> +	 * available if the wrprotection ioctl are implemented for the
> +	 * range according to the uffdio_register.ioctls.
> +	 */
> +#define UFFDIO_COPY_MODE_DONTWAKE		((__u64)1<<0)
> +	__u64 mode;
> +
> +	/*
> +	 * "copy" is written by the ioctl and must be at the end: the
> +	 * copy_from_user will not read the last 8 bytes.
> +	 */
> +	__s64 copy;
> +};
> +
> +struct uffdio_zeropage {
> +	struct uffdio_range range;
> +#define UFFDIO_ZEROPAGE_MODE_DONTWAKE		((__u64)1<<0)
> +	__u64 mode;
> +
> +	/*
> +	 * "zeropage" is written by the ioctl and must be at the end:
> +	 * the copy_from_user will not read the last 8 bytes.
> +	 */
> +	__s64 zeropage;
> +};
> +
> +#endif /* _LINUX_USERFAULTFD_H */
> diff --git a/criu/include/uffd.h b/criu/include/uffd.h
> index 6c931e2..4e1ba4a 100644
> --- a/criu/include/uffd.h
> +++ b/criu/include/uffd.h
> @@ -1,22 +1,7 @@
>  #ifndef __CR_UFFD_H_
>  #define __CR_UFFD_H_
>  
> -#include "config.h"
> -#include "restorer.h"
> -
> -#ifdef CONFIG_HAS_UFFD
> -
> -#include <syscall.h>
> -#include <linux/userfaultfd.h>
> -
> -#ifndef __NR_userfaultfd
> -#error "missing __NR_userfaultfd definition"
> -#endif
> -
> +struct task_restore_args;
>  extern int setup_uffd(struct task_restore_args *task_args, int pid);
> -#else
> -static inline int setup_uffd(struct task_restore_args *task_args, int pid) { return 0; }
> -
> -#endif /* CONFIG_HAS_UFFD */
>  
>  #endif /* __CR_UFFD_H_ */
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 59e1723..5fc8949 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -16,6 +16,8 @@
>  #include <sys/resource.h>
>  #include <signal.h>
>  
> +#include "linux/userfaultfd.h"
> +
>  #include "compiler.h"
>  #include "asm/string.h"
>  #include "asm/types.h"
> @@ -704,16 +706,16 @@ static void rst_tcp_socks_all(struct task_restore_args *ta)
>  
>  static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
>  {
> +	int rc;
> +	struct uffdio_register uffdio_register;
> +	unsigned long expected_ioctls;
> +
>  	/*
>  	 * If uffd == -1, this means that userfaultfd is not enabled
>  	 * or it is not available.
>  	 */
>  	if (uffd == -1)
>  		return 0;
> -#ifdef CONFIG_HAS_UFFD
> -	int rc;
> -	struct uffdio_register uffdio_register;
> -	unsigned long expected_ioctls;
>  
>  	uffdio_register.range.start = addr;
>  	uffdio_register.range.len = len;
> @@ -733,7 +735,6 @@ static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
>  		pr_err("lazy-pages: unexpected missing uffd ioctl for anon memory\n");
>  	}
>  
> -#endif
>  	return 0;
>  }
>  
> diff --git a/criu/uffd.c b/criu/uffd.c
> index 254a5b3..6f5e774 100644
> --- a/criu/uffd.c
> +++ b/criu/uffd.c
> @@ -15,21 +15,23 @@
>  #include <sys/un.h>
>  #include <sys/socket.h>
>  
> +#include "linux/userfaultfd.h"
> +
>  #include "asm/page.h"
> -#include "include/log.h"
> -#include "include/criu-plugin.h"
> -#include "include/page-read.h"
> -#include "include/files-reg.h"
> -#include "include/kerndat.h"
> -#include "include/mem.h"
> -#include "include/uffd.h"
> -#include "include/util-pie.h"
> -#include "include/pstree.h"
> -#include "include/crtools.h"
> -#include "include/cr_options.h"
> +#include "log.h"
> +#include "criu-plugin.h"
> +#include "page-read.h"
> +#include "files-reg.h"
> +#include "kerndat.h"
> +#include "mem.h"
> +#include "uffd.h"
> +#include "util-pie.h"
> +#include "pstree.h"
> +#include "crtools.h"
> +#include "cr_options.h"
>  #include "xmalloc.h"
> -
> -#ifdef CONFIG_HAS_UFFD
> +#include "syscall-codes.h"
> +#include "restorer.h"
>  
>  #undef  LOG_PREFIX
>  #define LOG_PREFIX "lazy-pages: "
> @@ -94,7 +96,7 @@ int setup_uffd(struct task_restore_args *task_args, int pid)
>  	 * Open userfaulfd FD which is passed to the restorer blob and
>  	 * to a second process handling the userfaultfd page faults.
>  	 */
> -	task_args->uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
> +	task_args->uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>  
>  	/*
>  	 * Check if the UFFD_API is the one which is expected
> @@ -680,13 +682,3 @@ int cr_lazy_pages()
>  
>  	return ret;
>  }
> -
> -#else /* CONFIG_HAS_UFFD */
> -
> -int cr_lazy_pages()
> -{
> -	pr_msg("userfaultfd system call is not supported, cannot start lazy-pages daemon\n");
> -	return -1;
> -}
> -
> -#endif /* CONFIG_HAS_UFFD */
> diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
> index c48b52e..c3dc85e 100644
> --- a/scripts/feature-tests.mak
> +++ b/scripts/feature-tests.mak
> @@ -90,18 +90,3 @@ int main(void)
>  }
>  
>  endef
> -
> -define FEATURE_TEST_UFFD
> -
> -#include <syscall.h>
> -#include <linux/userfaultfd.h>
> -
> -int main(void)
> -{
> -#ifndef __NR_userfaultfd
> -#error "missing __NR_userfaultfd definition"
> -#endif
> -	return 0;
> -}
> -
> -endef
> -- 
> 2.5.0
> 


More information about the CRIU mailing list