[CRIU] [PATCH 3/5] kerndat: add test for availability of PR_SET_THP_DISABLE prctl

Mike Rapoport rppt at linux.vnet.ibm.com
Wed Jun 21 09:33:24 MSK 2017


The PR_SET_THP_DISABLE prctl allows control of transparent huge pages on
per-process basis. It is available since Linux 3.15, but until recently it
set VM_NOHUGEPAGE for all VMAs created after prctl() call, which prevents
proper restore for combination of pre- and post-copy. A recent change to
prctl(PR_SET_THP_DISABLE) behavior eliminates the use of per-VMA flags and
we can use the new version of the prctl() to disable THP.

Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
 criu/include/kerndat.h |  1 +
 criu/include/prctl.h   |  8 ++++++
 criu/kerndat.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+)

diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
index a6dac92..bf751bb 100644
--- a/criu/include/kerndat.h
+++ b/criu/include/kerndat.h
@@ -57,6 +57,7 @@ struct kerndat_s {
 	unsigned int sysctl_nr_open;
 	unsigned long files_stat_max_files;
 	bool has_pid_for_children_ns;
+	bool has_thp_disable;
 };
 
 extern struct kerndat_s kdat;
diff --git a/criu/include/prctl.h b/criu/include/prctl.h
index 79c3bd8..8e7fef3 100644
--- a/criu/include/prctl.h
+++ b/criu/include/prctl.h
@@ -74,4 +74,12 @@ struct prctl_mm_map {
 # define PR_GET_TID_ADDRESS	40
 #endif
 
+#ifndef PR_SET_THP_DISABLE
+# define PR_SET_THP_DISABLE	41
+#endif
+
+#ifndef PR_GET_THP_DISABLE
+# define PR_GET_THP_DISABLE	42
+#endif
+
 #endif /* __CR_PRCTL_H__ */
diff --git a/criu/kerndat.c b/criu/kerndat.c
index 5bca4bf..28dbd48 100644
--- a/criu/kerndat.c
+++ b/criu/kerndat.c
@@ -12,6 +12,7 @@
 #include <stdint.h>
 #include <sys/socket.h>
 #include <arpa/inet.h>  /* for sockaddr_in and inet_ntoa() */
+#include <sys/prctl.h>
 
 #include "int.h"
 #include "log.h"
@@ -34,6 +35,7 @@
 #include "netfilter.h"
 #include "linux/userfaultfd.h"
 #include "netfilter.h"
+#include "prctl.h"
 
 struct kerndat_s kdat = {
 };
@@ -854,6 +856,76 @@ close:
 	bclose(&f);
 	return ret;
 }
+
+int kerndat_has_thp_disable(void)
+{
+	struct bfd f;
+	void *addr;
+	char *str;
+	int ret = -1;
+	bool vma_match = false;
+
+	if (prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
+		if (errno != EINVAL)
+			return -1;
+		pr_info("PR_SET_THP_DISABLE is not available\n");
+		return 0;
+	}
+
+	addr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+	if (addr == MAP_FAILED) {
+		pr_perror("Can't mmap memory for THP disable test");
+		return -1;
+	}
+
+	if (prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0))
+		return -1;
+
+	f.fd = open("/proc/self/smaps", O_RDONLY);
+	if (f.fd < 0) {
+		pr_perror("Can't open /proc/self/smaps");
+		goto out;
+	}
+	if (bfdopenr(&f))
+		goto out;
+
+	while ((str = breadline(&f)) != NULL) {
+		if (IS_ERR(str))
+			goto out;
+
+		if (is_vma_range_fmt(str)) {
+			unsigned long vma_addr;
+
+			if (sscanf(str, "%lx-", &vma_addr) != 1) {
+				pr_err("Can't parse: %s\n", str);
+				goto out;
+			}
+
+			if (vma_addr == (unsigned long)addr)
+				vma_match = true;
+		}
+
+		if (vma_match && !strncmp(str, "VmFlags: ", 9)) {
+			u32 flags = 0;
+			u64 madv = 0;
+			int io_pf = 0;
+
+			parse_vmflags(str, &flags, &madv, &io_pf);
+			kdat.has_thp_disable = !test_bit(MADV_NOHUGEPAGE, &madv);
+			break;
+		}
+	}
+
+	ret = 0;
+
+out:
+	bclose(&f);
+	munmap(addr, PAGE_SIZE);
+
+	return ret;
+}
+
 int kerndat_init(void)
 {
 	int ret;
@@ -904,6 +976,8 @@ int kerndat_init(void)
 		ret = kerndat_has_ns_get_parent();
 	if (!ret)
 		ret = kerndat_has_pid_for_children_ns();
+	if (!ret)
+		ret = kerndat_has_thp_disable();
 
 	kerndat_lsm();
 	kerndat_mmap_min_addr();
-- 
2.7.4



More information about the CRIU mailing list