[CRIU] [PATCH] fs: Add binfmt_misc support
Kirill Tkhai
ktkhai at odin.com
Thu Dec 3 03:21:03 PST 2015
This patch implements checkpoint/restore functionality
for binfmt_misc mounts. Both magic and extension types
and "disabled" state are supported.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
image-desc.c | 1
include/image-desc.h | 1
include/magic.h | 1
include/protobuf-desc.h | 3 -
mount.c | 223 ++++++++++++++++++++++++++++++++++++++++++++
protobuf-desc.c | 1
protobuf/Makefile | 1
protobuf/binfmt-misc.proto | 10 ++
8 files changed, 238 insertions(+), 3 deletions(-)
create mode 100644 protobuf/binfmt-misc.proto
diff --git a/image-desc.c b/image-desc.c
index f95b218..9fb96c8 100644
--- a/image-desc.c
+++ b/image-desc.c
@@ -79,6 +79,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
FD_ENTRY_F(IP6TABLES, "ip6tables-%d", O_NOBUF),
FD_ENTRY_F(TMPFS_IMG, "tmpfs-%d.tar.gz", O_NOBUF),
FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%d.tar.gz", O_NOBUF),
+ FD_ENTRY(BINFMT_MISC, "binfmt-misc-%d"),
FD_ENTRY(TTY_FILES, "tty"),
FD_ENTRY(TTY_INFO, "tty-info"),
FD_ENTRY(FILE_LOCKS, "filelocks"),
diff --git a/include/image-desc.h b/include/image-desc.h
index ae8ae16..90933e9 100644
--- a/include/image-desc.h
+++ b/include/image-desc.h
@@ -82,6 +82,7 @@ enum {
CR_FD_TMPFS_IMG,
CR_FD_TMPFS_DEV,
+ CR_FD_BINFMT_MISC,
CR_FD_PAGES,
CR_FD_VMAS,
diff --git a/include/magic.h b/include/magic.h
index 25cfb47..3cb3766 100644
--- a/include/magic.h
+++ b/include/magic.h
@@ -90,6 +90,7 @@
#define CPUINFO_MAGIC 0x61404013 /* Nyandoma */
#define USERNS_MAGIC 0x55474906 /* Kazan */
#define SECCOMP_MAGIC 0x64413049 /* Kostomuksha */
+#define BINFMT_MISC_MAGIC 0x67343323 /* Apatity */
#define IFADDR_MAGIC RAW_IMAGE_MAGIC
#define ROUTE_MAGIC RAW_IMAGE_MAGIC
diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
index cc78208..bb66a86 100644
--- a/include/protobuf-desc.h
+++ b/include/protobuf-desc.h
@@ -56,12 +56,13 @@ enum {
PB_CPUINFO,
PB_USERNS,
PB_NETNS,
+ PB_BINFMT_MISC, /* 50 */
/* PB_AUTOGEN_STOP */
PB_PAGEMAP_HEAD,
PB_IDS,
- PB_SIGACT, /* 50 */
+ PB_SIGACT,
PB_NETDEV,
PB_REMAP_FPATH,
PB_SK_QUEUES,
diff --git a/mount.c b/mount.c
index ca10d43..5c6b72d 100644
--- a/mount.c
+++ b/mount.c
@@ -29,6 +29,7 @@
#include "sysfs_parse.h"
#include "protobuf/mnt.pb-c.h"
+#include "protobuf/binfmt-misc.pb-c.h"
#define AUTODETECTED_MOUNT "CRIU:AUTOGENERATED"
#define MS_PROPAGATE (MS_SHARED | MS_PRIVATE | MS_UNBINDABLE | MS_SLAVE)
@@ -1282,8 +1283,90 @@ static int devtmpfs_restore(struct mount_info *pm)
return ret;
}
+static int parse_binfmt_misc_entry(struct bfd *f, BinfmtMiscEntry *bme)
+{
+ while (1) {
+ char *str;
+
+ str = breadline(f);
+ if (IS_ERR(str))
+ return -1;
+ if (!str)
+ break;
+
+ if (!strncmp(str, "enabled", 7)) {
+ bme->enabled = true;
+ continue;
+ }
+
+ if (!strncmp(str, "disabled", 8))
+ continue;
+
+ if (!strncmp(str, "offset ", 7)) {
+ if (sscanf(str + 7, "%i", &bme->offset) != 1)
+ return -1;
+ bme->has_offset = true;
+ continue;
+ }
+
+#define DUP_EQUAL_AS(key, member) \
+ if (!strncmp(str, key, strlen(key))) { \
+ bme->member = xstrdup(str + strlen(key)); \
+ if (!bme->member) \
+ return -1; \
+ continue; \
+ }
+ DUP_EQUAL_AS("interpreter ", interpreter)
+ DUP_EQUAL_AS("flags: ", flags)
+ DUP_EQUAL_AS("extension .", extension)
+ DUP_EQUAL_AS("magic ", magic)
+ DUP_EQUAL_AS("mask ", mask)
+#undef DUP_EQUAL_AS
+
+ pr_perror("binfmt_misc: unsupported feature %s\n", str);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int dump_binfmt_misc_entry(int dfd, char *name, struct cr_img *img)
+{
+ BinfmtMiscEntry bme = BINFMT_MISC_ENTRY__INIT;
+ struct bfd f;
+ int ret = -1;
+
+ f.fd = openat(dfd, name, O_RDONLY);
+ if (f.fd < 0) {
+ pr_perror("binfmt_misc: can't open %s", name);
+ return -1;
+ }
+
+ if (bfdopenr(&f))
+ return -1;
+
+ if (parse_binfmt_misc_entry(&f, &bme))
+ goto err;
+
+ bme.name = name;
+
+ if (pb_write_one(img, &bme, PB_BINFMT_MISC))
+ goto err;
+ ret = 0;
+err:
+ free(bme.interpreter);
+ free(bme.flags);
+ free(bme.extension);
+ free(bme.magic);
+ free(bme.mask);
+ bclose(&f);
+ return ret;
+
+}
+
static int binfmt_misc_dump(struct mount_info *pm)
{
+ struct cr_img *img;
int fd, ret = -1;
struct dirent *de;
DIR *fdir = NULL;
@@ -1298,6 +1381,10 @@ static int binfmt_misc_dump(struct mount_info *pm)
return -1;
}
+ img = open_image(CR_FD_BINFMT_MISC, O_DUMP, pm->s_dev);
+ if (!img)
+ goto out;
+
while ((de = readdir(fdir))) {
if (dir_dots(de))
continue;
@@ -1306,16 +1393,147 @@ static int binfmt_misc_dump(struct mount_info *pm)
if (!strcmp(de->d_name, "status"))
continue;
- pr_err("binfmt_misc isn't empty: %s\n", de->d_name);
- goto out;
+ if (dump_binfmt_misc_entry(fd, de->d_name, img))
+ goto out;
}
ret = 0;
out:
+ if (img)
+ close_image(img);
closedir(fdir);
return ret;
}
+static int restore_binfmt_misc_entry(char *mp, char *buf, BinfmtMiscEntry *bme)
+{
+ int fd, len, ret = -1;
+ char path[PATH_MAX+1];
+
+ snprintf(path, PATH_MAX, "%s/register", mp);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ pr_perror("binfmt_misc: can't open %s", path);
+ return -1;
+ }
+
+ len = strlen(buf);
+
+ if (write(fd, buf, len) != len) {
+ pr_perror("binfmt_misc: can't write to %s", path);
+ goto close;
+ }
+
+ if (!bme->enabled) {
+ close(fd);
+ snprintf(path, PATH_MAX, "%s/%s", mp, bme->name);
+
+ fd = open(path, O_WRONLY);
+ if (!fd) {
+ pr_perror("binfmt_misc: can't open %s", path);
+ goto out;
+ }
+ if (write(fd, "0", 1) != 1) {
+ pr_perror("binfmt_misc: can't write to %s", path);
+ goto close;
+ }
+ }
+
+ ret = 0;
+close:
+ close(fd);
+out:
+ return ret;
+}
+
+#define BINFMT_MISC_STR (1920 + 1)
+static int make_bfmtm_magic_str(char *buf, BinfmtMiscEntry *bme)
+{
+ int i, len;
+
+ /*
+ * Format is ":name:type(M):offset:magic:mask:interpreter:flags".
+ * Magic and mask are special fields. Kernel outputs them as
+ * a sequence of hexidecimal numbers (abc -> 616263), and we
+ * dump them without changes. But for registering a new entry
+ * it expects every byte is prepended with \x, i.e. \x61\x62\x63.
+ */
+ len = strlen(bme->name) + 3 /* offset < 128 */ + 2 * strlen(bme->magic)
+ + (bme->mask ? 2 * strlen(bme->mask) : 0) + strlen(bme->interpreter)
+ + (bme->flags ? strlen(bme->flags) : 0) + strlen(":::::::");
+
+ if ((len > BINFMT_MISC_STR - 1) || bme->offset > 128)
+ return -1;
+
+ buf += sprintf(buf, ":%s:M:%d:", bme->name, bme->offset);
+
+ len = strlen(bme->magic);
+ for (i = 0; i < len; i += 2)
+ buf += sprintf(buf, "\\x%c%c", bme->magic[i], bme->magic[i + 1]);
+
+ buf += sprintf(buf, ":");
+
+ if (bme->mask) {
+ len = strlen(bme->mask);
+ for (i = 0; i < len; i += 2)
+ buf += sprintf(buf, "\\x%c%c", bme->mask[i], bme->mask[i + 1]);
+ }
+
+ sprintf(buf, ":%s:%s", bme->interpreter, bme->flags ? : "\0");
+
+ return 1;
+}
+
+static int binfmt_misc_restore(struct mount_info *mi)
+{
+ struct cr_img *img;
+ char *buf;
+ int ret = -1;;
+
+ buf = xmalloc(BINFMT_MISC_STR);
+ if (!buf)
+ return -1;
+
+ img = open_image(CR_FD_BINFMT_MISC, O_RSTR, mi->s_dev);
+ if (!img || empty_image(img)) {
+ goto free_buf;
+ }
+
+ ret = 0;
+ while (ret == 0) {
+ BinfmtMiscEntry *bme;
+
+ ret = pb_read_one_eof(img, &bme, PB_BINFMT_MISC);
+ if (ret <= 0)
+ break;
+
+ /* :name:type:offset:magic/extension:mask:interpreter:flags */
+ if ((!bme->magic && !bme->extension) || !bme->interpreter) {
+ pr_perror("binfmt_misc: bad dump");
+ ret = -1;
+ } else if (bme->magic) {
+ ret = make_bfmtm_magic_str(buf, bme);
+ pr_perror("xxxbuf=%s\n", buf);
+ } else if (bme->extension) {
+ /* :name:E::extension::interpreter:flags */
+ ret = snprintf(buf, BINFMT_MISC_STR, ":%s:E::%s::%s:%s",
+ bme->name, bme->extension, bme->interpreter,
+ bme->flags ? : "\0");
+ }
+
+ if (ret > 0)
+ ret = restore_binfmt_misc_entry(mi->mountpoint, buf, bme);
+
+ binfmt_misc_entry__free_unpacked(bme, NULL);
+ }
+
+ close_image(img);
+free_buf:
+ free(buf);
+ return ret;
+}
+
static int fusectl_dump(struct mount_info *pm)
{
int fd, ret = -1;
@@ -1406,6 +1624,7 @@ static struct fstype fstypes[32] = {
.name = "binfmt_misc",
.code = FSTYPE__BINFMT_MISC,
.dump = binfmt_misc_dump,
+ .restore = binfmt_misc_restore,
}, {
.name = "tmpfs",
.code = FSTYPE__TMPFS,
diff --git a/protobuf-desc.c b/protobuf-desc.c
index ed56e9c..c80ebb7 100644
--- a/protobuf-desc.c
+++ b/protobuf-desc.c
@@ -62,6 +62,7 @@
#include "protobuf/cpuinfo.pb-c.h"
#include "protobuf/userns.pb-c.h"
#include "protobuf/seccomp.pb-c.h"
+#include "protobuf/binfmt-misc.pb-c.h"
struct cr_pb_message_desc cr_pb_descs[PB_MAX];
diff --git a/protobuf/Makefile b/protobuf/Makefile
index 86cc90e..4b43eca 100644
--- a/protobuf/Makefile
+++ b/protobuf/Makefile
@@ -56,6 +56,7 @@ proto-obj-y += userns.o
proto-obj-y += google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto
proto-obj-y += opts.o
proto-obj-y += seccomp.o
+proto-obj-y += binfmt-misc.o
CFLAGS += -I$(obj)/
diff --git a/protobuf/binfmt-misc.proto b/protobuf/binfmt-misc.proto
new file mode 100644
index 0000000..a6b2f8c
--- /dev/null
+++ b/protobuf/binfmt-misc.proto
@@ -0,0 +1,10 @@
+message binfmt_misc_entry {
+ required string name = 1;
+ required bool enabled = 2;
+ required string interpreter = 3;
+ optional string flags = 4;
+ optional string extension = 5;
+ optional string magic = 6;
+ optional string mask = 7;
+ optional int32 offset = 8;
+}
More information about the CRIU
mailing list