[CRIU] [PATCH] criu: always enable the userfaultfd support
Andrey Vagin
avagin at openvz.org
Mon Apr 25 22:38:53 PDT 2016
From: Andrew Vagin <avagin at virtuozzo.com>
Add linux/userfaultfd.h to criu sources. This header is a part
of the kernel API and I see nothing wrong to have in the repo.
Why we want to do this:
* to check that criu works correctly if a kernel doesn't
support userfaultfd.
* to check compilation of the userfaultfd part in travis-ci.
Acked-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
Acked-by: Adrian Reber <areber at redhat.com>
Cc: Mike Rapoport <rppt at linux.vnet.ibm.com>
Cc: Adrian Reber <areber at redhat.com>
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
criu/arch/arm/syscalls/syscall.def | 1 +
criu/arch/ppc64/syscalls/syscall-ppc64.tbl | 1 +
criu/arch/x86/syscalls/syscall_32.tbl | 1 +
criu/arch/x86/syscalls/syscall_64.tbl | 1 +
criu/include/linux/userfaultfd.h | 167 +++++++++++++++++++++++++++++
criu/include/uffd.h | 17 +--
criu/pie/restorer.c | 11 +-
criu/uffd.c | 40 +++----
scripts/feature-tests.mak | 15 ---
9 files changed, 194 insertions(+), 60 deletions(-)
create mode 100644 criu/include/linux/userfaultfd.h
diff --git a/criu/arch/arm/syscalls/syscall.def b/criu/arch/arm/syscalls/syscall.def
index a42c1b7..ffa807d 100644
--- a/criu/arch/arm/syscalls/syscall.def
+++ b/criu/arch/arm/syscalls/syscall.def
@@ -106,3 +106,4 @@ io_setup 0 243 (unsigned nr_events, aio_context_t *ctx)
io_submit 2 246 (aio_context_t ctx_id, long nr, struct iocb **iocbpp)
io_getevents 4 245 (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
seccomp 277 383 (unsigned int op, unsigned int flags, const char *uargs)
+userfaultfd 282 388 (int flags)
diff --git a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
index 5087037..d629d5a 100644
--- a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
+++ b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
@@ -103,3 +103,4 @@ __NR_io_setup 227 sys_io_setup (unsigned nr_events, aio_context_t *ctx_idp)
__NR_io_getevents 229 sys_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
__NR_io_submit 230 sys_io_submit (aio_context_t ctx_id, long nr, struct iocb **iocbpp)
__NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, unsigned long third, const void *ptr, long fifth)
+__NR_userfaultfd 364 sys_userfaultfd (int flags)
diff --git a/criu/arch/x86/syscalls/syscall_32.tbl b/criu/arch/x86/syscalls/syscall_32.tbl
index 53970f4..7c4867c 100644
--- a/criu/arch/x86/syscalls/syscall_32.tbl
+++ b/criu/arch/x86/syscalls/syscall_32.tbl
@@ -91,3 +91,4 @@ __NR_setns 346 sys_setns (int fd, int nstype)
__NR_kcmp 349 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
__NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs)
__NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags)
+__NR_userfaultfd 374 sys_userfaultfd (int flags)
diff --git a/criu/arch/x86/syscalls/syscall_64.tbl b/criu/arch/x86/syscalls/syscall_64.tbl
index 543e851..c0b8d8c 100644
--- a/criu/arch/x86/syscalls/syscall_64.tbl
+++ b/criu/arch/x86/syscalls/syscall_64.tbl
@@ -101,3 +101,4 @@ __NR_open_by_handle_at 304 sys_open_by_handle_at (int mountdirfd, struct file_
__NR_setns 308 sys_setns (int fd, int nstype)
__NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
__NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags)
+__NR_userfaultfd 323 sys_userfaultfd (int flags)
diff --git a/criu/include/linux/userfaultfd.h b/criu/include/linux/userfaultfd.h
new file mode 100644
index 0000000..9057d7a
--- /dev/null
+++ b/criu/include/linux/userfaultfd.h
@@ -0,0 +1,167 @@
+/*
+ * include/linux/userfaultfd.h
+ *
+ * Copyright (C) 2007 Davide Libenzi <davidel at xmailserver.org>
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ */
+
+#ifndef _LINUX_USERFAULTFD_H
+#define _LINUX_USERFAULTFD_H
+
+#include <linux/types.h>
+
+#define UFFD_API ((__u64)0xAA)
+/*
+ * After implementing the respective features it will become:
+ * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
+ * UFFD_FEATURE_EVENT_FORK)
+ */
+#define UFFD_API_FEATURES (0)
+#define UFFD_API_IOCTLS \
+ ((__u64)1 << _UFFDIO_REGISTER | \
+ (__u64)1 << _UFFDIO_UNREGISTER | \
+ (__u64)1 << _UFFDIO_API)
+#define UFFD_API_RANGE_IOCTLS \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_ZEROPAGE)
+
+/*
+ * Valid ioctl command number range with this API is from 0x00 to
+ * 0x3F. UFFDIO_API is the fixed number, everything else can be
+ * changed by implementing a different UFFD_API. If sticking to the
+ * same UFFD_API more ioctl can be added and userland will be aware of
+ * which ioctl the running kernel implements through the ioctl command
+ * bitmask written by the UFFDIO_API.
+ */
+#define _UFFDIO_REGISTER (0x00)
+#define _UFFDIO_UNREGISTER (0x01)
+#define _UFFDIO_WAKE (0x02)
+#define _UFFDIO_COPY (0x03)
+#define _UFFDIO_ZEROPAGE (0x04)
+#define _UFFDIO_API (0x3F)
+
+/* userfaultfd ioctl ids */
+#define UFFDIO 0xAA
+#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
+ struct uffdio_api)
+#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
+ struct uffdio_register)
+#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
+ struct uffdio_range)
+#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
+ struct uffdio_range)
+#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
+ struct uffdio_copy)
+#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
+ struct uffdio_zeropage)
+
+/* read() structure */
+struct uffd_msg {
+ __u8 event;
+
+ __u8 reserved1;
+ __u16 reserved2;
+ __u32 reserved3;
+
+ union {
+ struct {
+ __u64 flags;
+ __u64 address;
+ } pagefault;
+
+ struct {
+ /* unused reserved fields */
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ } reserved;
+ } arg;
+} __packed;
+
+/*
+ * Start at 0x12 and not at 0 to be more strict against bugs.
+ */
+#define UFFD_EVENT_PAGEFAULT 0x12
+#if 0 /* not available yet */
+#define UFFD_EVENT_FORK 0x13
+#endif
+
+/* flags for UFFD_EVENT_PAGEFAULT */
+#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
+#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
+
+struct uffdio_api {
+ /* userland asks for an API number and the features to enable */
+ __u64 api;
+ /*
+ * Kernel answers below with the all available features for
+ * the API, this notifies userland of which events and/or
+ * which flags for each event are enabled in the current
+ * kernel.
+ *
+ * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
+ * are to be considered implicitly always enabled in all kernels as
+ * long as the uffdio_api.api requested matches UFFD_API.
+ */
+#if 0 /* not available yet */
+#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#define UFFD_FEATURE_EVENT_FORK (1<<1)
+#endif
+ __u64 features;
+
+ __u64 ioctls;
+};
+
+struct uffdio_range {
+ __u64 start;
+ __u64 len;
+};
+
+struct uffdio_register {
+ struct uffdio_range range;
+#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
+#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * kernel answers which ioctl commands are available for the
+ * range, keep at the end as the last 8 bytes aren't read.
+ */
+ __u64 ioctls;
+};
+
+struct uffdio_copy {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+ /*
+ * There will be a wrprotection flag later that allows to map
+ * pages wrprotected on the fly. And such a flag will be
+ * available if the wrprotection ioctl are implemented for the
+ * range according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * "copy" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 copy;
+};
+
+struct uffdio_zeropage {
+ struct uffdio_range range;
+#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * "zeropage" is written by the ioctl and must be at the end:
+ * the copy_from_user will not read the last 8 bytes.
+ */
+ __s64 zeropage;
+};
+
+#endif /* _LINUX_USERFAULTFD_H */
diff --git a/criu/include/uffd.h b/criu/include/uffd.h
index 6c931e2..4e1ba4a 100644
--- a/criu/include/uffd.h
+++ b/criu/include/uffd.h
@@ -1,22 +1,7 @@
#ifndef __CR_UFFD_H_
#define __CR_UFFD_H_
-#include "config.h"
-#include "restorer.h"
-
-#ifdef CONFIG_HAS_UFFD
-
-#include <syscall.h>
-#include <linux/userfaultfd.h>
-
-#ifndef __NR_userfaultfd
-#error "missing __NR_userfaultfd definition"
-#endif
-
+struct task_restore_args;
extern int setup_uffd(struct task_restore_args *task_args, int pid);
-#else
-static inline int setup_uffd(struct task_restore_args *task_args, int pid) { return 0; }
-
-#endif /* CONFIG_HAS_UFFD */
#endif /* __CR_UFFD_H_ */
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 59e1723..5fc8949 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -16,6 +16,8 @@
#include <sys/resource.h>
#include <signal.h>
+#include "linux/userfaultfd.h"
+
#include "compiler.h"
#include "asm/string.h"
#include "asm/types.h"
@@ -704,16 +706,16 @@ static void rst_tcp_socks_all(struct task_restore_args *ta)
static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
{
+ int rc;
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+
/*
* If uffd == -1, this means that userfaultfd is not enabled
* or it is not available.
*/
if (uffd == -1)
return 0;
-#ifdef CONFIG_HAS_UFFD
- int rc;
- struct uffdio_register uffdio_register;
- unsigned long expected_ioctls;
uffdio_register.range.start = addr;
uffdio_register.range.len = len;
@@ -733,7 +735,6 @@ static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
pr_err("lazy-pages: unexpected missing uffd ioctl for anon memory\n");
}
-#endif
return 0;
}
diff --git a/criu/uffd.c b/criu/uffd.c
index 15cfa56..c17f003 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -16,21 +16,23 @@
#include <sys/socket.h>
#include <sys/epoll.h>
+#include "linux/userfaultfd.h"
+
#include "asm/page.h"
-#include "include/log.h"
-#include "include/criu-plugin.h"
-#include "include/page-read.h"
-#include "include/files-reg.h"
-#include "include/kerndat.h"
-#include "include/mem.h"
-#include "include/uffd.h"
-#include "include/util-pie.h"
-#include "include/pstree.h"
-#include "include/crtools.h"
-#include "include/cr_options.h"
+#include "log.h"
+#include "criu-plugin.h"
+#include "page-read.h"
+#include "files-reg.h"
+#include "kerndat.h"
+#include "mem.h"
+#include "uffd.h"
+#include "util-pie.h"
+#include "pstree.h"
+#include "crtools.h"
+#include "cr_options.h"
#include "xmalloc.h"
-
-#ifdef CONFIG_HAS_UFFD
+#include "syscall-codes.h"
+#include "restorer.h"
#undef LOG_PREFIX
#define LOG_PREFIX "lazy-pages: "
@@ -166,7 +168,7 @@ int setup_uffd(struct task_restore_args *task_args, int pid)
* Open userfaulfd FD which is passed to the restorer blob and
* to a second process handling the userfaultfd page faults.
*/
- task_args->uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ task_args->uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK);
/*
* Check if the UFFD_API is the one which is expected
@@ -833,13 +835,3 @@ int cr_lazy_pages()
return ret;
}
-
-#else /* CONFIG_HAS_UFFD */
-
-int cr_lazy_pages()
-{
- pr_msg("userfaultfd system call is not supported, cannot start lazy-pages daemon\n");
- return -1;
-}
-
-#endif /* CONFIG_HAS_UFFD */
diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
index c48b52e..c3dc85e 100644
--- a/scripts/feature-tests.mak
+++ b/scripts/feature-tests.mak
@@ -90,18 +90,3 @@ int main(void)
}
endef
-
-define FEATURE_TEST_UFFD
-
-#include <syscall.h>
-#include <linux/userfaultfd.h>
-
-int main(void)
-{
-#ifndef __NR_userfaultfd
-#error "missing __NR_userfaultfd definition"
-#endif
- return 0;
-}
-
-endef
--
2.5.5
More information about the CRIU
mailing list