[CRIU] [PATCH] fs: Add binfmt_misc support

Kirill Tkhai ktkhai at odin.com
Thu Dec 3 03:21:03 PST 2015


This patch implements checkpoint/restore functionality
for binfmt_misc mounts. Both magic and extension types
and "disabled" state are supported.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 image-desc.c               |    1 
 include/image-desc.h       |    1 
 include/magic.h            |    1 
 include/protobuf-desc.h    |    3 -
 mount.c                    |  223 ++++++++++++++++++++++++++++++++++++++++++++
 protobuf-desc.c            |    1 
 protobuf/Makefile          |    1 
 protobuf/binfmt-misc.proto |   10 ++
 8 files changed, 238 insertions(+), 3 deletions(-)
 create mode 100644 protobuf/binfmt-misc.proto

diff --git a/image-desc.c b/image-desc.c
index f95b218..9fb96c8 100644
--- a/image-desc.c
+++ b/image-desc.c
@@ -79,6 +79,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
 	FD_ENTRY_F(IP6TABLES,	"ip6tables-%d", O_NOBUF),
 	FD_ENTRY_F(TMPFS_IMG,	"tmpfs-%d.tar.gz", O_NOBUF),
 	FD_ENTRY_F(TMPFS_DEV,	"tmpfs-dev-%d.tar.gz", O_NOBUF),
+	FD_ENTRY(BINFMT_MISC,	"binfmt-misc-%d"),
 	FD_ENTRY(TTY_FILES,	"tty"),
 	FD_ENTRY(TTY_INFO,	"tty-info"),
 	FD_ENTRY(FILE_LOCKS,	"filelocks"),
diff --git a/include/image-desc.h b/include/image-desc.h
index ae8ae16..90933e9 100644
--- a/include/image-desc.h
+++ b/include/image-desc.h
@@ -82,6 +82,7 @@ enum {
 
 	CR_FD_TMPFS_IMG,
 	CR_FD_TMPFS_DEV,
+	CR_FD_BINFMT_MISC,
 	CR_FD_PAGES,
 
 	CR_FD_VMAS,
diff --git a/include/magic.h b/include/magic.h
index 25cfb47..3cb3766 100644
--- a/include/magic.h
+++ b/include/magic.h
@@ -90,6 +90,7 @@
 #define CPUINFO_MAGIC		0x61404013 /* Nyandoma */
 #define USERNS_MAGIC		0x55474906 /* Kazan */
 #define SECCOMP_MAGIC		0x64413049 /* Kostomuksha */
+#define BINFMT_MISC_MAGIC	0x67343323 /* Apatity */
 
 #define IFADDR_MAGIC		RAW_IMAGE_MAGIC
 #define ROUTE_MAGIC		RAW_IMAGE_MAGIC
diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
index cc78208..bb66a86 100644
--- a/include/protobuf-desc.h
+++ b/include/protobuf-desc.h
@@ -56,12 +56,13 @@ enum {
 	PB_CPUINFO,
 	PB_USERNS,
 	PB_NETNS,
+	PB_BINFMT_MISC,		/* 50 */
 
 	/* PB_AUTOGEN_STOP */
 
 	PB_PAGEMAP_HEAD,
 	PB_IDS,
-	PB_SIGACT,		/* 50 */
+	PB_SIGACT,
 	PB_NETDEV,
 	PB_REMAP_FPATH,
 	PB_SK_QUEUES,
diff --git a/mount.c b/mount.c
index ca10d43..5c6b72d 100644
--- a/mount.c
+++ b/mount.c
@@ -29,6 +29,7 @@
 #include "sysfs_parse.h"
 
 #include "protobuf/mnt.pb-c.h"
+#include "protobuf/binfmt-misc.pb-c.h"
 
 #define AUTODETECTED_MOUNT "CRIU:AUTOGENERATED"
 #define MS_PROPAGATE (MS_SHARED | MS_PRIVATE | MS_UNBINDABLE | MS_SLAVE)
@@ -1282,8 +1283,90 @@ static int devtmpfs_restore(struct mount_info *pm)
 	return ret;
 }
 
+static int parse_binfmt_misc_entry(struct bfd *f, BinfmtMiscEntry *bme)
+{
+	while (1) {
+		char *str;
+
+		str = breadline(f);
+                if (IS_ERR(str))
+			return -1;
+		if (!str)
+			break;
+
+		if (!strncmp(str, "enabled", 7)) {
+			bme->enabled = true;
+			continue;
+		}
+
+		if (!strncmp(str, "disabled", 8))
+			continue;
+
+		if (!strncmp(str, "offset ", 7)) {
+			if (sscanf(str + 7, "%i", &bme->offset) != 1)
+				return -1;
+			bme->has_offset = true;
+			continue;
+		}
+
+#define DUP_EQUAL_AS(key, member)					\
+		if (!strncmp(str, key, strlen(key))) {			\
+			bme->member = xstrdup(str + strlen(key));	\
+			if (!bme->member)				\
+				return -1;				\
+			continue;					\
+		}
+		DUP_EQUAL_AS("interpreter ", interpreter)
+		DUP_EQUAL_AS("flags: ", flags)
+		DUP_EQUAL_AS("extension .", extension)
+		DUP_EQUAL_AS("magic ", magic)
+		DUP_EQUAL_AS("mask ", mask)
+#undef DUP_EQUAL_AS
+
+		pr_perror("binfmt_misc: unsupported feature %s\n", str);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int dump_binfmt_misc_entry(int dfd, char *name, struct cr_img *img)
+{
+	BinfmtMiscEntry bme = BINFMT_MISC_ENTRY__INIT;
+	struct bfd f;
+	int ret = -1;
+
+	f.fd = openat(dfd, name, O_RDONLY);
+	if (f.fd < 0) {
+		pr_perror("binfmt_misc: can't open %s", name);
+		return -1;
+	}
+
+	if (bfdopenr(&f))
+		return -1;
+
+	if (parse_binfmt_misc_entry(&f, &bme))
+		goto err;
+
+	bme.name = name;
+
+	if (pb_write_one(img, &bme, PB_BINFMT_MISC))
+		goto err;
+	ret = 0;
+err:
+	free(bme.interpreter);
+	free(bme.flags);
+	free(bme.extension);
+	free(bme.magic);
+	free(bme.mask);
+	bclose(&f);
+	return ret;
+
+}
+
 static int binfmt_misc_dump(struct mount_info *pm)
 {
+	struct cr_img *img;
 	int fd, ret = -1;
 	struct dirent *de;
 	DIR *fdir = NULL;
@@ -1298,6 +1381,10 @@ static int binfmt_misc_dump(struct mount_info *pm)
 		return -1;
 	}
 
+	img = open_image(CR_FD_BINFMT_MISC, O_DUMP, pm->s_dev);
+	if (!img)
+		goto out;
+
 	while ((de = readdir(fdir))) {
 		if (dir_dots(de))
 			continue;
@@ -1306,16 +1393,147 @@ static int binfmt_misc_dump(struct mount_info *pm)
 		if (!strcmp(de->d_name, "status"))
 			continue;
 
-		pr_err("binfmt_misc isn't empty: %s\n", de->d_name);
-		goto out;
+		if (dump_binfmt_misc_entry(fd, de->d_name, img))
+			goto out;
 	}
 
 	ret = 0;
 out:
+	if (img)
+		close_image(img);
 	closedir(fdir);
 	return ret;
 }
 
+static int restore_binfmt_misc_entry(char *mp, char *buf, BinfmtMiscEntry *bme)
+{
+	int fd, len, ret = -1;
+	char path[PATH_MAX+1];
+
+	snprintf(path, PATH_MAX, "%s/register", mp);
+
+	fd = open(path, O_WRONLY);
+	if (fd < 0) {
+		pr_perror("binfmt_misc: can't open %s", path);
+		return -1;
+	}
+
+	len = strlen(buf);
+
+	if (write(fd, buf, len) != len) {
+		pr_perror("binfmt_misc: can't write to %s", path);
+		goto close;
+	}
+
+	if (!bme->enabled) {
+		close(fd);
+		snprintf(path, PATH_MAX, "%s/%s", mp, bme->name);
+
+		fd = open(path, O_WRONLY);
+		if (!fd) {
+			pr_perror("binfmt_misc: can't open %s", path);
+			goto out;
+		}
+		if (write(fd, "0", 1) != 1) {
+			pr_perror("binfmt_misc: can't write to %s", path);
+			goto close;
+		}
+	}
+
+	ret = 0;
+close:
+	close(fd);
+out:
+	return ret;
+}
+
+#define BINFMT_MISC_STR (1920 + 1)
+static int make_bfmtm_magic_str(char *buf, BinfmtMiscEntry *bme)
+{
+	int i, len;
+
+	/*
+	 * Format is ":name:type(M):offset:magic:mask:interpreter:flags".
+	 * Magic and mask are special fields. Kernel outputs them as
+	 * a sequence of hexidecimal numbers (abc -> 616263), and we
+	 * dump them without changes. But for registering a new entry
+	 * it expects every byte is prepended with \x, i.e. \x61\x62\x63.
+	 */
+	len = strlen(bme->name) + 3 /* offset < 128 */ + 2 * strlen(bme->magic)
+	    + (bme->mask ? 2 * strlen(bme->mask) : 0) + strlen(bme->interpreter)
+	    + (bme->flags ? strlen(bme->flags) : 0) + strlen(":::::::");
+
+	if ((len > BINFMT_MISC_STR - 1) || bme->offset > 128)
+		return -1;
+
+	buf += sprintf(buf, ":%s:M:%d:", bme->name, bme->offset);
+
+	len = strlen(bme->magic);
+	for (i = 0; i < len; i += 2)
+		buf += sprintf(buf, "\\x%c%c", bme->magic[i], bme->magic[i + 1]);
+
+	buf += sprintf(buf, ":");
+
+	if (bme->mask) {
+		len = strlen(bme->mask);
+		for (i = 0; i < len; i += 2)
+			buf += sprintf(buf, "\\x%c%c", bme->mask[i], bme->mask[i + 1]);
+	}
+
+	sprintf(buf, ":%s:%s", bme->interpreter, bme->flags ? : "\0");
+
+	return 1;
+}
+
+static int binfmt_misc_restore(struct mount_info *mi)
+{
+	struct cr_img *img;
+	char *buf;
+	int ret = -1;;
+
+	buf = xmalloc(BINFMT_MISC_STR);
+	if (!buf)
+		return -1;
+
+	img = open_image(CR_FD_BINFMT_MISC, O_RSTR, mi->s_dev);
+	if (!img || empty_image(img)) {
+		goto free_buf;
+	}
+
+	ret = 0;
+	while (ret == 0) {
+		BinfmtMiscEntry *bme;
+
+		ret = pb_read_one_eof(img, &bme, PB_BINFMT_MISC);
+		if (ret <= 0)
+			break;
+
+		/* :name:type:offset:magic/extension:mask:interpreter:flags */
+		if ((!bme->magic && !bme->extension) || !bme->interpreter) {
+			pr_perror("binfmt_misc: bad dump");
+			ret = -1;
+		} else if (bme->magic) {
+			ret = make_bfmtm_magic_str(buf, bme);
+			pr_perror("xxxbuf=%s\n", buf);
+		} else if (bme->extension) {
+			/* :name:E::extension::interpreter:flags */
+			ret = snprintf(buf, BINFMT_MISC_STR, ":%s:E::%s::%s:%s",
+				       bme->name, bme->extension, bme->interpreter,
+				       bme->flags ? : "\0");
+		}
+
+		if (ret > 0)
+			ret = restore_binfmt_misc_entry(mi->mountpoint, buf, bme);
+
+		binfmt_misc_entry__free_unpacked(bme, NULL);
+	}
+
+	close_image(img);
+free_buf:
+	free(buf);
+	return ret;
+}
+
 static int fusectl_dump(struct mount_info *pm)
 {
 	int fd, ret = -1;
@@ -1406,6 +1624,7 @@ static struct fstype fstypes[32] = {
 		.name = "binfmt_misc",
 		.code = FSTYPE__BINFMT_MISC,
 		.dump = binfmt_misc_dump,
+		.restore = binfmt_misc_restore,
 	}, {
 		.name = "tmpfs",
 		.code = FSTYPE__TMPFS,
diff --git a/protobuf-desc.c b/protobuf-desc.c
index ed56e9c..c80ebb7 100644
--- a/protobuf-desc.c
+++ b/protobuf-desc.c
@@ -62,6 +62,7 @@
 #include "protobuf/cpuinfo.pb-c.h"
 #include "protobuf/userns.pb-c.h"
 #include "protobuf/seccomp.pb-c.h"
+#include "protobuf/binfmt-misc.pb-c.h"
 
 struct cr_pb_message_desc cr_pb_descs[PB_MAX];
 
diff --git a/protobuf/Makefile b/protobuf/Makefile
index 86cc90e..4b43eca 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -56,6 +56,7 @@ proto-obj-y	+= userns.o
 proto-obj-y	+= google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto
 proto-obj-y	+= opts.o
 proto-obj-y	+= seccomp.o
+proto-obj-y	+= binfmt-misc.o
 
 CFLAGS		+= -I$(obj)/
 
diff --git a/protobuf/binfmt-misc.proto b/protobuf/binfmt-misc.proto
new file mode 100644
index 0000000..a6b2f8c
--- /dev/null
+++ b/protobuf/binfmt-misc.proto
@@ -0,0 +1,10 @@
+message binfmt_misc_entry {
+	required string		name			= 1;
+	required bool		enabled			= 2;
+	required string		interpreter		= 3;
+	optional string		flags			= 4;
+	optional string		extension		= 5;
+	optional string		magic			= 6;
+	optional string		mask			= 7;
+	optional int32		offset			= 8;
+}



More information about the CRIU mailing list