[CRIU] [PATCH v3 3/6] kerndat: add test for availability of PR_SET_THP_DISABLE prctl
Mike Rapoport
rppt at linux.vnet.ibm.com
Wed Jun 21 21:22:43 MSK 2017
The PR_SET_THP_DISABLE prctl allows control of transparent huge pages on
per-process basis. It is available since Linux 3.15, but until recently it
set VM_NOHUGEPAGE for all VMAs created after prctl() call, which prevents
proper restore for combination of pre- and post-copy. A recent change to
prctl(PR_SET_THP_DISABLE) behavior eliminates the use of per-VMA flags and
we can use the new version of the prctl() to disable THP.
Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
criu/include/kerndat.h | 1 +
criu/include/prctl.h | 8 ++++++
criu/kerndat.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 83 insertions(+)
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
index a6dac92..bf751bb 100644
--- a/criu/include/kerndat.h
+++ b/criu/include/kerndat.h
@@ -57,6 +57,7 @@ struct kerndat_s {
unsigned int sysctl_nr_open;
unsigned long files_stat_max_files;
bool has_pid_for_children_ns;
+ bool has_thp_disable;
};
extern struct kerndat_s kdat;
diff --git a/criu/include/prctl.h b/criu/include/prctl.h
index 79c3bd8..8e7fef3 100644
--- a/criu/include/prctl.h
+++ b/criu/include/prctl.h
@@ -74,4 +74,12 @@ struct prctl_mm_map {
# define PR_GET_TID_ADDRESS 40
#endif
+#ifndef PR_SET_THP_DISABLE
+# define PR_SET_THP_DISABLE 41
+#endif
+
+#ifndef PR_GET_THP_DISABLE
+# define PR_GET_THP_DISABLE 42
+#endif
+
#endif /* __CR_PRCTL_H__ */
diff --git a/criu/kerndat.c b/criu/kerndat.c
index 5bca4bf..cdf8a93 100644
--- a/criu/kerndat.c
+++ b/criu/kerndat.c
@@ -12,6 +12,7 @@
#include <stdint.h>
#include <sys/socket.h>
#include <arpa/inet.h> /* for sockaddr_in and inet_ntoa() */
+#include <sys/prctl.h>
#include "int.h"
#include "log.h"
@@ -34,6 +35,7 @@
#include "netfilter.h"
#include "linux/userfaultfd.h"
#include "netfilter.h"
+#include "prctl.h"
struct kerndat_s kdat = {
};
@@ -854,6 +856,76 @@ close:
bclose(&f);
return ret;
}
+
+int kerndat_has_thp_disable(void)
+{
+ struct bfd f;
+ void *addr;
+ char *str;
+ int ret = -1;
+ bool vma_match = false;
+
+ if (prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
+ if (errno != EINVAL)
+ return -1;
+ pr_info("PR_SET_THP_DISABLE is not available\n");
+ return 0;
+ }
+
+ addr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ if (addr == MAP_FAILED) {
+ pr_perror("Can't mmap memory for THP disable test");
+ return -1;
+ }
+
+ if (prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0))
+ return -1;
+
+ f.fd = open("/proc/self/smaps", O_RDONLY);
+ if (f.fd < 0) {
+ pr_perror("Can't open /proc/self/smaps");
+ goto out;
+ }
+ if (bfdopenr(&f))
+ goto out;
+
+ while ((str = breadline(&f)) != NULL) {
+ if (IS_ERR(str))
+ goto out;
+
+ if (is_vma_range_fmt(str)) {
+ unsigned long vma_addr;
+
+ if (sscanf(str, "%lx-", &vma_addr) != 1) {
+ pr_err("Can't parse: %s\n", str);
+ goto out;
+ }
+
+ if (vma_addr == (unsigned long)addr)
+ vma_match = true;
+ }
+
+ if (vma_match && !strncmp(str, "VmFlags: ", 9)) {
+ u32 flags = 0;
+ u64 madv = 0;
+ int io_pf = 0;
+
+ parse_vmflags(str, &flags, &madv, &io_pf);
+ kdat.has_thp_disable = !(madv & (1 << MADV_NOHUGEPAGE));
+ break;
+ }
+ }
+
+ ret = 0;
+
+out:
+ bclose(&f);
+ munmap(addr, PAGE_SIZE);
+
+ return ret;
+}
+
int kerndat_init(void)
{
int ret;
@@ -904,6 +976,8 @@ int kerndat_init(void)
ret = kerndat_has_ns_get_parent();
if (!ret)
ret = kerndat_has_pid_for_children_ns();
+ if (!ret)
+ ret = kerndat_has_thp_disable();
kerndat_lsm();
kerndat_mmap_min_addr();
--
2.7.4
More information about the CRIU
mailing list