[CRIU] [PATCH 4/5] kdat: Cache kdat object into /run/criu.kdat (v3)

Pavel Emelyanov xemul at virtuozzo.com
Thu Apr 27 07:58:15 PDT 2017


Doing kerndat checks on every criu start is way too slow. We
need some way to speed this checks up on a particular box.

As suggested by Andre, Dima and Mike let's try to keep the
collected kdat bits into some tmpfs file. Keeping it on
tmpfs would invaludate this cache on every machine reboot.

There have been many suggestions how to generate this file,
my proposal is to create it once the kdat object is filled
by criu, w/o any explicit command. Optionally we can add
'criu kdat --save|--drop' actions to manage this file.

v2:
* don't ignore return code of write() (some glibcs complain)
* unlink tmp file in case rename failed

v3:
* add one more magic into kerndat_s which is the 'date +%s'
* ignore any errors opening or saving cache. Only size/magic
  mismatch matters (and result in dropping the cache)
* cache file path is Makefile-configurable (RUNDIR)
* don't save cache if kerndat auto-detection failed

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 Makefile.install       |  3 +-
 criu/Makefile.crtools  |  1 +
 criu/include/kerndat.h |  1 +
 criu/include/magic.h   |  6 ++++
 criu/kerndat.c         | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/Makefile.install b/Makefile.install
index f1e5415..ddde93d 100644
--- a/Makefile.install
+++ b/Makefile.install
@@ -7,6 +7,7 @@ MANDIR		:= $(PREFIX)/share/man
 LIBDIR		:= $(PREFIX)/lib
 INCLUDEDIR	:= $(PREFIX)/include
 LIBEXECDIR	:= $(PREFIX)/libexec
+RUNDIR		:= /run
 
 #
 # For recent Debian/Ubuntu with multiarch support.
@@ -21,7 +22,7 @@ else
         endif
 endif
 
-export PREFIX BINDIR SBINDIR MANDIR
+export PREFIX BINDIR SBINDIR MANDIR RUNDIR
 export LIBDIR INCLUDEDIR LIBEXECDIR
 
 install-man:
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
index 03756ae..9ce0652 100644
--- a/criu/Makefile.crtools
+++ b/criu/Makefile.crtools
@@ -1,6 +1,7 @@
 ccflags-y		+= -iquote criu/$(ARCH)
 ccflags-y		+= $(COMPEL_UAPI_INCLUDES)
 CFLAGS_REMOVE_clone-noasan.o += $(CFLAGS-ASAN)
+CFLAGS_kerndat.o	+= -DKDAT_MAGIC_2=${shell date +%s} -DKDAT_RUNDIR=\"$(RUNDIR)\"
 ldflags-y		+= -r
 
 obj-y			+= action-scripts.o
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
index f423103..5be04c6 100644
--- a/criu/include/kerndat.h
+++ b/criu/include/kerndat.h
@@ -29,6 +29,7 @@ enum loginuid_func {
 };
 
 struct kerndat_s {
+	u32 magic1, magic2;
 	dev_t shmem_dev;
 	int last_cap;
 	u64 zero_page_pfn;
diff --git a/criu/include/magic.h b/criu/include/magic.h
index 059d005..6fb241a 100644
--- a/criu/include/magic.h
+++ b/criu/include/magic.h
@@ -116,4 +116,10 @@
 #define STATS_MAGIC		0x57093306 /* Ostashkov */
 #define IRMAP_CACHE_MAGIC	0x57004059 /* Ivanovo */
 
+/*
+ * Main magic for kerndat_s structure.
+ */
+
+#define KDAT_MAGIC		0x57023458 /* Torzhok */
+
 #endif /* __CR_MAGIC_H__ */
diff --git a/criu/kerndat.c b/criu/kerndat.c
index 4b625c5..8264edc 100644
--- a/criu/kerndat.c
+++ b/criu/kerndat.c
@@ -652,10 +652,92 @@ close:
 	bclose(&f);
 	return ret;
 }
+
+#define KERNDAT_CACHE_FILE	KDAT_RUNDIR"/criu.kdat"
+#define KERNDAT_CACHE_FILE_TMP	KDAT_RUNDIR"/.criu.kdat"
+
+static int kerndat_try_load_cache(void)
+{
+	int fd, ret;
+
+	fd = open(KERNDAT_CACHE_FILE, O_RDONLY);
+	if (fd < 0) {
+		pr_warn("Can't load %s\n", KERNDAT_CACHE_FILE);
+		return 1;
+	}
+
+	ret = read(fd, &kdat, sizeof(kdat));
+	if (ret < 0) {
+		pr_perror("Can't read kdat cache");
+		return -1;
+	}
+
+	close(fd);
+
+	if (ret != sizeof(kdat) ||
+			kdat.magic1 != KDAT_MAGIC ||
+			kdat.magic2 != KDAT_MAGIC_2) {
+		pr_warn("Stale %s file\n", KERNDAT_CACHE_FILE);
+		unlink(KERNDAT_CACHE_FILE);
+		return 1;
+	}
+
+	pr_info("Loaded kdat cache from %s\n", KERNDAT_CACHE_FILE);
+	return 0;
+}
+
+static void kerndat_save_cache(void)
+{
+	int fd, ret;
+	struct statfs s;
+
+	fd = open(KERNDAT_CACHE_FILE_TMP, O_CREAT | O_EXCL | O_WRONLY, 0600);
+	if (fd < 0)
+		/*
+		 * It can happen that we race with some other criu
+		 * instance. That's OK, just ignore this error and
+		 * proceed.
+		 */
+		return;
+
+	if (fstatfs(fd, &s) < 0 || s.f_type != TMPFS_MAGIC) {
+		pr_warn("Can't keep kdat cache on non-tempfs\n");
+		close(fd);
+		goto unl;
+	}
+
+	/*
+	 * One magic to make sure we're reading the kdat file.
+	 * One more magic to make somehow sure we don't read kdat
+	 * from some other criu
+	 */
+	kdat.magic1 = KDAT_MAGIC;
+	kdat.magic2 = KDAT_MAGIC_2;
+	ret = write(fd, &kdat, sizeof(kdat));
+	close(fd);
+
+	if (ret == sizeof(kdat))
+		ret = rename(KERNDAT_CACHE_FILE_TMP, KERNDAT_CACHE_FILE);
+	else {
+		ret = -1;
+		errno = EIO;
+	}
+
+	if (ret < 0) {
+		pr_perror("Couldn't save %s", KERNDAT_CACHE_FILE);
+unl:
+		unlink(KERNDAT_CACHE_FILE_TMP);
+	}
+}
+
 int kerndat_init(void)
 {
 	int ret;
 
+	ret = kerndat_try_load_cache();
+	if (ret <= 0)
+		return ret;
+
 	ret = check_pagemap();
 	if (!ret)
 		ret = kerndat_get_shmemdev();
@@ -693,5 +775,8 @@ int kerndat_init(void)
 	kerndat_lsm();
 	kerndat_mmap_min_addr();
 
+	if (!ret)
+		kerndat_save_cache();
+
 	return ret;
 }
-- 
2.5.5



More information about the CRIU mailing list