[CRIU] [PATCH 4/6] inotify: Add checkpoint/restore v2
Cyrill Gorcunov
gorcunov at openvz.org
Tue Apr 17 09:22:29 EDT 2012
To checkpoint notifiers we need help from kernel side,
thus FS_INOTIFY_GET_MARK ioctl code added.
Other than that
- the inotify descriptors are collected into global inotify.img file
- the inotify marks are collected into global inotify-wd.img
- at restore time they are read and restored with help
of inotify_init and inotify_add_watch calls
- file owners are not yet supported, will be addressed in another patch
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
Makefile | 1 +
cr-dump.c | 14 +++
cr-restore.c | 8 ++
cr-show.c | 42 ++++++++
crtools.c | 14 +++
include/crtools.h | 6 +
include/image.h | 17 +++
include/inotify.h | 33 ++++++
include/mount.h | 2 +
include/types.h | 7 ++
inotify.c | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++
mount.c | 25 +++++
12 files changed, 473 insertions(+), 0 deletions(-)
create mode 100644 include/inotify.h
create mode 100644 inotify.c
diff --git a/Makefile b/Makefile
index 652c71a..39e3636 100644
--- a/Makefile
+++ b/Makefile
@@ -49,6 +49,7 @@ OBJS += namespaces.o
OBJS += uts_ns.o
OBJS += ipc_ns.o
OBJS += mount.o
+OBJS += inotify.o
OBJS-BLOB += parasite.o
SRCS-BLOB += $(patsubst %.o,%.c,$(OBJS-BLOB))
diff --git a/cr-dump.c b/cr-dump.c
index b3e6fff..8c83fe8 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -34,6 +34,7 @@
#include "parasite.h"
#include "parasite-syscall.h"
#include "files.h"
+#include "inotify.h"
#ifndef CONFIG_X86_64
# error No x86-32 support yet
@@ -427,6 +428,9 @@ static int do_dump_gen_file(const struct fd_parms *p, int lfd,
case FDINFO_PIPE:
ret = dump_one_pipe(lfd, e.id, p);
break;
+ case FDINFO_INOTIFY:
+ ret = dump_one_inotify(lfd, e.id, p);
+ break;
default:
ret = dump_one_reg_file(lfd, e.id, p);
break;
@@ -559,6 +563,13 @@ static int dump_chrdev(struct fd_parms *p, int lfd, const struct cr_fdset *set)
return dump_unsupp_fd(p);
}
+static int dump_inotify(struct fd_parms *p, int lfd, const struct cr_fdset *set)
+{
+ p->id = MAKE_FD_GENID(p->stat.st_dev, p->stat.st_ino, p->pos);
+ p->type = FDINFO_INOTIFY;
+ return do_dump_gen_file(p, lfd, set);
+}
+
static int dump_one_file(pid_t pid, int fd, int lfd, char fd_flags,
const struct cr_fdset *cr_fdset)
{
@@ -575,6 +586,9 @@ static int dump_one_file(pid_t pid, int fd, int lfd, char fd_flags,
if (S_ISCHR(p.stat.st_mode))
return dump_chrdev(&p, lfd, cr_fdset);
+ if (is_inotify(lfd))
+ return dump_inotify(&p, lfd, cr_fdset);
+
if (S_ISREG(p.stat.st_mode) ||
S_ISDIR(p.stat.st_mode) ||
S_ISFIFO(p.stat.st_mode))
diff --git a/cr-restore.c b/cr-restore.c
index 1cf475c..669cc07 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -37,6 +37,8 @@
#include "restorer-blob.h"
#include "crtools.h"
#include "namespaces.h"
+#include "mount.h"
+#include "inotify.h"
static struct task_entries *task_entries;
@@ -287,6 +289,12 @@ static int prepare_shared(void)
if (collect_unix_sockets())
return -1;
+ if (collect_mount_info())
+ return -1;
+
+ if (collect_inotify())
+ return -1;
+
list_for_each_entry(pi, &tasks, list) {
ret = prepare_shmem_pid(pi->pid);
if (ret < 0)
diff --git a/cr-show.c b/cr-show.c
index cf6471c..ea92cb6 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -62,6 +62,7 @@ static char *fdtype2s(u8 type)
[FDINFO_INETSK] = "isk",
[FDINFO_PIPE] = "pipe",
[FDINFO_UNIXSK] = "usk",
+ [FDINFO_INOTIFY] = "ify",
};
if (type > FDINFO_UND && type < FD_INFO_MAX)
@@ -171,6 +172,47 @@ void show_ghost_file(int fd, struct cr_options *o)
pr_img_tail(CR_FD_GHOST_FILE);
}
+void show_inotify_wd(int fd_inotify_wd, struct cr_options *o)
+{
+ struct inotify_wd_entry e;
+
+ pr_img_head(CR_FD_INOTIFY_WD);
+ while (1) {
+ int ret;
+
+ ret = read_img_eof(fd_inotify_wd, &e);
+ if (ret <= 0)
+ goto out;
+
+ pr_msg("inotify-wd: id 0x%08x 0x%08x s_dev 0x%08x i_ino 0x%016lx mask 0x%08x "
+ "[fhandle] 0x%08x 0x%08x 0x%016lx:0x%016lx ...\n",
+ e.id, e.wd, e.s_dev, e.i_ino, e.mask,
+ e.f_handle.bytes, e.f_handle.type,
+ e.f_handle.__handle[0],
+ e.f_handle.__handle[1]);
+ }
+out:
+ pr_img_tail(CR_FD_INOTIFY_WD);
+}
+
+void show_inotify(int fd_inotify, struct cr_options *o)
+{
+ struct inotify_file_entry e;
+
+ pr_img_head(CR_FD_INOTIFY);
+ while (1) {
+ int ret;
+
+ ret = read_img_eof(fd_inotify, &e);
+ if (ret <= 0)
+ goto out;
+
+ pr_msg("inotify: id 0x%08x flags 0x%08x\n", e.id, e.flags);
+ }
+out:
+ pr_img_tail(CR_FD_INOTIFY);
+}
+
void show_pipes_data(int fd_pipes, struct cr_options *o)
{
struct pipe_data_entry e;
diff --git a/crtools.c b/crtools.c
index 3a2c9c6..0c4bd0e 100644
--- a/crtools.c
+++ b/crtools.c
@@ -61,6 +61,20 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
.show = show_reg_files,
},
+ /* inotify descriptors */
+ [CR_FD_INOTIFY] = {
+ .fmt = FMT_FNAME_INOTIFY,
+ .magic = INOTIFY_MAGIC,
+ .show = show_inotify,
+ },
+
+ /* inotify descriptors */
+ [CR_FD_INOTIFY_WD] = {
+ .fmt = FMT_FNAME_INOTIFY_WD,
+ .magic = INOTIFY_WMAGIC,
+ .show = show_inotify_wd,
+ },
+
/* core data, such as regs and vmas and such */
[CR_FD_CORE] = {
.fmt = FMT_FNAME_CORE,
diff --git a/include/crtools.h b/include/crtools.h
index a5903fb..d79878f 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -54,6 +54,8 @@ enum {
CR_FD_PIPES,
CR_FD_PIPES_DATA,
CR_FD_REMAP_FPATH,
+ CR_FD_INOTIFY,
+ CR_FD_INOTIFY_WD,
_CR_FD_GLOB_TO,
CR_FD_MAX
@@ -86,6 +88,8 @@ struct cr_fd_desc_tmpl {
void show_files(int fd_files, struct cr_options *o);
void show_pages(int fd_pages, struct cr_options *o);
void show_reg_files(int fd_reg_files, struct cr_options *o);
+void show_inotify(int fd_inotify, struct cr_options *o);
+void show_inotify_wd(int fd_inotify_wd, struct cr_options *o);
void show_core(int fd_core, struct cr_options *o);
void show_mm(int fd_mm, struct cr_options *o);
void show_vmas(int fd_vma, struct cr_options *o);
@@ -107,6 +111,8 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX];
#define FMT_FNAME_PAGES "pages-%d.img"
#define FMT_FNAME_SHMEM_PAGES "pages-shmem-%ld.img"
#define FMT_FNAME_REG_FILES "reg-files.img"
+#define FMT_FNAME_INOTIFY "inotify.img"
+#define FMT_FNAME_INOTIFY_WD "inotify-wd.img"
#define FMT_FNAME_CORE "core-%d.img"
#define FMT_FNAME_MM "mm-%d.img"
#define FMT_FNAME_VMAS "vmas-%d.img"
diff --git a/include/image.h b/include/image.h
index 995e9c1..9c90358 100644
--- a/include/image.h
+++ b/include/image.h
@@ -32,6 +32,8 @@
#define MM_MAGIC 0x57492820 /* Pskov */
#define REMAP_FPATH_MAGIC 0x59133954 /* Vologda */
#define GHOST_FILE_MAGIC 0x52583605 /* Oryol */
+#define INOTIFY_MAGIC 0x48424431 /* Volgograd */
+#define INOTIFY_WMAGIC 0x54562009 /* Svetlogorsk (Rauschen) */
#define PIPEFS_MAGIC 0x50495045
@@ -41,6 +43,7 @@ enum fd_types {
FDINFO_PIPE,
FDINFO_INETSK,
FDINFO_UNIXSK,
+ FDINFO_INOTIFY,
FD_INFO_MAX
};
@@ -83,6 +86,20 @@ struct ghost_file_entry {
u32 mode;
} __packed;
+struct inotify_wd_entry {
+ u32 id;
+ u64 i_ino;
+ u32 mask;
+ u32 s_dev;
+ u32 wd;
+ fh_t f_handle;
+} __packed;
+
+struct inotify_file_entry {
+ u32 id;
+ u16 flags;
+} __packed;
+
struct fdinfo_entry {
u32 fd;
u8 type;
diff --git a/include/inotify.h b/include/inotify.h
new file mode 100644
index 0000000..68962bc
--- /dev/null
+++ b/include/inotify.h
@@ -0,0 +1,33 @@
+#ifndef INOTIFY_H__
+#define INOTIFY_H__
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "compiler.h"
+#include "types.h"
+#include "files.h"
+#include "crtools.h"
+
+#define FS_INOTIFY_GET_MARK 0xc0044912
+
+struct inotify_user_mark {
+ u64 i_ino; /* inode mask refers to */
+ u32 mask; /* event mask */
+ u32 s_dev; /* device inode lays on */
+ u32 r_dev; /* device inode lays on, if special */
+ u32 wd; /* watch descriptor */
+ fh_t f_handle;/* file handle encoded */
+};
+
+struct inotify_mark_req {
+ u32 index; /* mark index we're interested in */
+ u32 size; /* size provided for @mark */
+ struct inotify_user_mark *mark;
+};
+
+extern int is_inotify(int lfd);
+extern int dump_one_inotify(int lfd, u32 id, const struct fd_parms *p);
+extern int collect_inotify(void);
+
+#endif /* INOTIFY_H__ */
diff --git a/include/mount.h b/include/mount.h
index 608c448..68793bd 100644
--- a/include/mount.h
+++ b/include/mount.h
@@ -4,5 +4,7 @@
struct proc_mountinfo;
extern int open_mnt_root(unsigned int s_dev, struct proc_mountinfo *mntinfo, int nr_mntinfo);
+extern int open_mount(unsigned int s_dev);
+extern int collect_mount_info(void);
#endif /* MOUNT_H__ */
diff --git a/include/types.h b/include/types.h
index 36d5a8f..fe528ce 100644
--- a/include/types.h
+++ b/include/types.h
@@ -204,4 +204,11 @@ enum kcmp_type {
# define SCM_MAX_FD 253
#endif
+/* File handle */
+typedef struct {
+ u32 bytes;
+ u32 type;
+ u64 __handle[16];
+} fh_t;
+
#endif /* CR_TYPES_H_ */
diff --git a/inotify.c b/inotify.c
new file mode 100644
index 0000000..1b0d100
--- /dev/null
+++ b/inotify.c
@@ -0,0 +1,304 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <utime.h>
+#include <dirent.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/inotify.h>
+#include <sys/vfs.h>
+#include <sys/wait.h>
+#include <sys/poll.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <aio.h>
+
+#include "compiler.h"
+#include "types.h"
+#include "inotify.h"
+
+#include "syscall.h"
+#include "crtools.h"
+#include "mount.h"
+#include "image.h"
+#include "util.h"
+#include "files.h"
+#include "file-ids.h"
+#include "log.h"
+#include "list.h"
+#include "lock.h"
+
+struct inotify_wd_info {
+ struct list_head list;
+ struct inotify_wd_entry iwe;
+};
+
+struct inotify_file_info {
+ struct list_head list;
+ struct inotify_file_entry ife;
+ struct list_head marks;
+ struct file_desc d;
+};
+
+#ifndef ANON_INODE_FS_MAGIC
+#define ANON_INODE_FS_MAGIC 0x09041934
+#endif
+
+static LIST_HEAD(info_head);
+
+/* Checks if file desciptor @lfd is inotify */
+int is_inotify(int lfd)
+{
+ char link[PATH_MAX], path[32];
+ struct statfs statfs;
+ ssize_t ret;
+
+ if (fstatfs(lfd, &statfs)) {
+ pr_perror("Can't obtain statfs on fd %d\n", lfd);
+ return 0;
+ }
+
+ if (statfs.f_type != ANON_INODE_FS_MAGIC)
+ return 0;
+
+ snprintf(path, sizeof(path), "/proc/self/fd/%d", lfd);
+ ret = readlink(path, link, sizeof(link));
+ if (ret < 0) {
+ pr_perror("Can't read link of fd %d\n", lfd);
+ return 0;
+ }
+ link[ret] = 0;
+ if (!strcmp(link, "anon_inode:inotify"))
+ return 1;
+
+ return 0;
+}
+
+int dump_one_inotify(int lfd, u32 id, const struct fd_parms *p)
+{
+ struct inotify_file_entry ie;
+ struct inotify_wd_entry we;
+ struct inotify_user_mark m;
+ struct inotify_mark_req r;
+ int image_fd, image_wd;
+ int ret = -1;
+
+ image_fd = fdset_fd(glob_fdset, CR_FD_INOTIFY);
+ image_wd = fdset_fd(glob_fdset, CR_FD_INOTIFY_WD);
+
+ pr_info("Dumping inotify %d with id 0x%08x\n", lfd, id);
+
+ ie.id = id;
+ ie.flags= p->flags;
+
+ if (write_img(image_fd, &ie))
+ goto err;
+
+ memzero(&r, sizeof(r));
+ r.index = -1U;
+ r.size = sizeof(m);
+ r.mark = &m;
+
+next:
+ while (1) {
+ r.index++;
+ memzero(&m, sizeof(m));
+ ret = ioctl(lfd, FS_INOTIFY_GET_MARK, &r);
+ if (ret) {
+ if (errno == ENODATA)
+ continue;
+ else if (errno == ENOENT)
+ break;
+ else {
+ pr_perror("Fetching inotify failed %d", -errno);
+ goto err;
+ }
+ }
+
+ we.id = id;
+ we.mask = m.mask;
+ we.i_ino = m.i_ino;
+ we.s_dev = m.s_dev;
+ we.wd = m.wd;
+ we.f_handle = m.f_handle;
+
+ pr_info("inotify: id 0x%08x flags 0x%08x wd 0x%08x s_dev 0x%08x i_ino 0x%16lx mask 0x%08x\n",
+ ie.id, ie.flags, we.wd, we.s_dev, we.i_ino, we.mask);
+ pr_info("\t[fhandle] bytes 0x%08x type 0x%08x __handle 0x%016lx:0x%016lx\n",
+ we.f_handle.bytes, we.f_handle.type,
+ we.f_handle.__handle[0], we.f_handle.__handle[1]);
+
+ if (write_img(image_wd, &we))
+ goto err;
+ }
+finish:
+ ret = 0;
+err:
+ return ret;
+}
+
+static int restore_one_inotify(int inotify_fd, struct inotify_wd_entry *iwe)
+{
+ char path[32];
+ int mntfd, ret = -1;
+ int i, wd, target;
+
+ mntfd = open_mount(iwe->s_dev);
+ if (mntfd < 0) {
+ pr_err("Mount root for 0x%08x not found\n", iwe->s_dev);
+ return -1;
+ }
+
+ target = sys_open_by_handle_at(mntfd, (void *)&iwe->f_handle, 0);
+ if (target < 0) {
+ pr_perror("Can't open file handle for 0x%08x:0x%016lx",
+ iwe->s_dev, iwe->i_ino);
+ return -1;
+ }
+
+ snprintf(path, sizeof(path), "/proc/self/fd/%d", target);
+ pr_debug("\t\tRestore watch for 0x%08x:0x%016lx\n", iwe->s_dev, iwe->i_ino);
+
+ /*
+ * FIXME The kernel allocates wd-s sequentially,
+ * this is suboptimal, but the kernel doesn't
+ * provide and API for this yet :(
+ */
+ wd = 1;
+ while (wd >= 0) {
+ wd = inotify_add_watch(inotify_fd, path, iwe->mask);
+ if (wd < 0) {
+ pr_err("Can't add watch for %d with %d\n", inotify_fd, iwe->wd);
+ break;
+ } else if (wd == iwe->wd) {
+ ret = 0;
+ break;
+ } else if (wd > iwe->wd) {
+ pr_err("Usorted watch found for %d with %d\n", inotify_fd, iwe->wd);
+ break;
+ }
+
+ pr_debug("\t\tWatch got %d but %d expected\n", wd, iwe->wd);
+ inotify_rm_watch(inotify_fd, wd);
+ }
+
+ close(mntfd);
+ close(target);
+
+ return ret;
+}
+
+static int open_inotify_fd(struct file_desc *d)
+{
+ struct inotify_file_info *info;
+ struct inotify_wd_info *wd_info;
+ struct file_desc *p;
+ int tmp;
+
+ info = container_of(d, struct inotify_file_info, d);
+
+ tmp = inotify_init1(info->ife.flags);
+ if (tmp < 0) {
+ pr_perror("Can't create inotify for 0x%08x", info->ife.id);
+ return -1;
+ }
+
+ list_for_each_entry(wd_info, &info->marks, list) {
+ pr_info("\tRestore inotify for 0x%08x\n", wd_info->iwe.id);
+ if (restore_one_inotify(tmp, &wd_info->iwe)) {
+ close_safe(&tmp);
+ break;
+ }
+ }
+
+ return tmp;
+}
+
+static struct file_desc_ops desc_ops = {
+ .open = open_inotify_fd,
+};
+
+static int collect_mark(struct inotify_wd_info *mark)
+{
+ struct inotify_file_info *p;
+
+ list_for_each_entry(p, &info_head, list) {
+ if (p->ife.id == mark->iwe.id) {
+ list_add(&mark->list, &p->marks);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+int collect_inotify(void)
+{
+ struct inotify_file_info *info;
+ struct inotify_wd_info *mark;
+ int image_fd = -1, image_wd = -1, ret = -1;
+
+ image_fd = open_image_ro(CR_FD_INOTIFY);
+ if (image_fd < 0)
+ return -1;
+
+ while (1) {
+ struct inotify_file_entry ife;
+
+ ret = read_img_eof(image_fd, &ife);
+ if (ret < 0)
+ goto err;
+ else if (!ret)
+ break;
+
+ info = xmalloc(sizeof(*info));
+ if (!info)
+ return -1;
+
+ info->ife = ife;
+ INIT_LIST_HEAD(&info->list);
+ INIT_LIST_HEAD(&info->marks);
+
+ list_add(&info->list, &info_head);
+ }
+
+ image_wd = open_image_ro(CR_FD_INOTIFY_WD);
+ if (image_wd < 0)
+ goto err;
+
+ while (1) {
+ int idx;
+
+ mark = xmalloc(sizeof(*mark));
+ if (!mark)
+ goto err;
+ ret = read_img_eof(image_wd, &mark->iwe);
+ if (ret < 0)
+ goto err;
+ else if (!ret)
+ break;
+
+ if (collect_mark(mark)) {
+ ret = -1;
+ pr_err("Can't find inotify with id 0x%08x\n", mark->iwe.id);
+ goto err;
+ }
+ }
+
+ list_for_each_entry(info, &info_head, list) {
+ pr_info("Collected inotify: id 0x%08x flags 0x%08x\n", info->ife.id, info->ife.flags);
+ file_desc_add(&info->d, FDINFO_INOTIFY, info->ife.id, &desc_ops);
+ }
+ ret = 0;
+err:
+ close_safe(&image_wd);
+ close_safe(&image_fd);
+
+ return ret;
+}
diff --git a/mount.c b/mount.c
index 3db7d34..310ad98 100644
--- a/mount.c
+++ b/mount.c
@@ -10,9 +10,13 @@
#include "types.h"
#include "util.h"
+#include "log.h"
#include "mount.h"
#include "proc_parse.h"
+static struct proc_mountinfo *mntinfo;
+static int nr_mntinfo;
+
/*
* Returns path for mount device @s_dev
*
@@ -40,3 +44,24 @@ again:
return -ENOENT;
}
+
+int open_mount(unsigned int s_dev)
+{
+ return open_mnt_root(s_dev, mntinfo, nr_mntinfo);
+}
+
+int collect_mount_info(void)
+{
+ nr_mntinfo = 64;
+ mntinfo = xmalloc(sizeof(*mntinfo) * nr_mntinfo);
+ if (!mntinfo)
+ return -1;
+
+ nr_mntinfo = parse_mountinfo(getpid(), mntinfo, nr_mntinfo);
+ if (nr_mntinfo < 1) {
+ pr_err("Parsing mountinfo %d failed\n", getpid());
+ return -1;
+ }
+
+ return 0;
+}
--
1.7.7.6
More information about the CRIU
mailing list