[CRIU] [PATCH 0/5] eventfd checkpoint/restore

Cyrill Gorcunov gorcunov at openvz.org
Fri Apr 27 18:13:23 EDT 2012


Hi guys, here is eventfd checkpoint/restore support
implemented.

Note it depends on kernel draft/ugly patch I pushed
into our repo as a stub for a while (the proper kernel
patch should be done in a way more complex way, so I though
having a stub might be an option, after all the interface
provided to user-space will not be changed only internal
kernel code, thus don't blame me for the patch below I
know is ugly as hell but I needed something which just
work for testing).

The inotify and evenpoll crtools patches are coming soon.
---
From: Cyrill Gorcunov <gorcunov at openvz.org>
Subject: DRAFT: c/r: proc -- Add typed fdinfo for eventfd/eventpoll/inotify files

This patch will be heavily reworked and at moment it's just a stab
for kernel interface.

It adds additional data to be printed for /proc/pid/fdinfo/fd files.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 fs/eventfd.c                     |   16 ++++++
 fs/eventpoll.c                   |   32 ++++++++++++
 fs/notify/inotify/inotify_user.c |   97 +++++++++++++++++++++++++++++++++++++++
 fs/proc/base.c                   |   65 ++++++++++++++++++++++----
 include/linux/eventfd.h          |   13 +++++
 include/linux/eventpoll.h        |    5 ++
 include/linux/inotify.h          |    4 +
 7 files changed, 223 insertions(+), 9 deletions(-)

Index: linux-2.6.git/fs/eventfd.c
===================================================================
--- linux-2.6.git.orig/fs/eventfd.c
+++ linux-2.6.git/fs/eventfd.c
@@ -327,6 +327,22 @@ static const struct file_operations even
 #endif
 };
 
+int is_file_eventfd(struct file *f)
+{
+	return f->f_op == &eventfd_fops;
+}
+
+int eventfd_fd_info(struct file *f, char *info, size_t avail)
+{
+	struct eventfd_ctx *ctx = f->private_data;
+
+	spin_lock_irq(&ctx->wqh.lock);
+	snprintf(info, avail, "count-raw: %16lx\n", ctx->count);
+	spin_unlock_irq(&ctx->wqh.lock);
+
+	return 0;
+}
+
 /**
  * eventfd_fget - Acquire a reference of an eventfd file descriptor.
  * @fd: [in] Eventfd file descriptor.
Index: linux-2.6.git/fs/eventpoll.c
===================================================================
--- linux-2.6.git.orig/fs/eventpoll.c
+++ linux-2.6.git/fs/eventpoll.c
@@ -292,7 +292,7 @@ ctl_table epoll_table[] = {
 
 static const struct file_operations eventpoll_fops;
 
-static inline int is_file_epoll(struct file *f)
+int is_file_epoll(struct file *f)
 {
 	return f->f_op == &eventpoll_fops;
 }
@@ -1815,6 +1815,36 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 
 #endif /* HAVE_SET_RESTORE_SIGMASK */
 
+int epoll_fd_info(struct file *f, char *info, size_t avail)
+{
+	struct eventpoll *ep = f->private_data;
+	struct rb_node *rbp;
+	int err = 0;
+
+	mutex_lock(&epmutex);
+	mutex_lock_nested(&ep->mtx, 0);
+
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+
+		if (avail <= 64) {
+			err = -ENOSPC;
+			break;
+		}
+
+		snprintf(info, avail, "tfd: %8d events: %8x data: %16lx\n",
+			epi->ffd.fd, epi->event.events, (long)epi->event.data);
+
+		avail -= strlen(info);
+		info += strlen(info);
+	}
+
+	mutex_unlock(&ep->mtx);
+	mutex_unlock(&epmutex);
+
+	return err;
+}
+
 static int __init eventpoll_init(void)
 {
 	struct sysinfo si;
Index: linux-2.6.git/fs/notify/inotify/inotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify_user.c
+++ linux-2.6.git/fs/notify/inotify/inotify_user.c
@@ -829,6 +829,103 @@ static struct fsnotify_group *inotify_ne
 	return group;
 }
 
+#ifdef CONFIG_EXPORTFS
+int is_file_inotify(struct file *f)
+{
+	return f->f_op == &inotify_fops;
+}
+
+int inotify_fd_info(struct file *f, char *info, size_t avail)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_mark *mark;
+	struct file_handle *fhandle;
+	struct inode *inode;
+	int size, ret = 0;
+	int num = 100;
+
+	fhandle = kmalloc(sizeof(*fhandle) + MAX_HANDLE_SZ, GFP_ATOMIC);
+	if (!fhandle)
+		return -ENOMEM;
+
+	group = f->private_data;
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry(mark, &group->marks_list, g_list) {
+		struct inotify_inode_mark *i_mark;
+		u32 user_wd, user_mask;
+		int i;
+
+		if (num-- < 1) {
+			ret = -ENOSPC;
+			break;
+		}
+
+		if (!mark->target)
+			continue;
+		if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
+			continue;
+
+		fhandle->handle_bytes = MAX_HANDLE_SZ;
+		size = MAX_HANDLE_SZ >> 2;
+
+		user_mask = inotify_mask_to_arg(mark->mask);
+		if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+			i_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+			user_wd = i_mark->wd;
+		} else
+			user_wd = -1;
+
+		ret = exportfs_encode_fh(mark->target, (struct fid *)fhandle->f_handle, &size,  0);
+		if ((ret == 255) || (ret == -ENOSPC)) {
+			ret = -EOVERFLOW;
+			goto err_unlock;
+		}
+
+		fhandle->handle_type = ret;
+		fhandle->handle_bytes = size * sizeof(u32);
+
+		ret = 0;
+
+		inode = mark->target->d_inode;
+		snprintf(info, avail,
+			"wd: %8d ino: %16lx, sdev: %8x mask %8x "
+			"fhandle-bytes: %8x fhandle-type: %8x f_handle: ",
+			user_wd, inode->i_ino, inode->i_sb->s_dev,
+			user_mask, fhandle->handle_type, fhandle->handle_bytes);
+
+		if (avail <= strlen(info)) {
+			ret = -ENOSPC;
+			break;
+		}
+
+		avail -= strlen(info);
+		info  += strlen(info);
+
+		size = fhandle->handle_bytes;
+
+		if ((size * 2 + 2) >= avail) {
+			ret = -ENOSPC;
+			break;
+		}
+
+		for (i = 0; i < size; i++, info += 2)
+			sprintf(info, "%02x", (int)(unsigned char)fhandle->f_handle[i]);
+		sprintf(info, "\n");
+		info++;
+		avail -= (size * 2 + 2);
+	}
+
+err_unlock:
+	spin_unlock(&group->mark_lock);
+	kfree(fhandle);
+
+	return ret;
+}
+#else
+int is_file_inotify(struct file *f) { return 0; }
+int inotify_fd_info(struct file *f, char *info, size_t avail) { return 0; }
+#endif
 
 /* inotify syscalls */
 SYSCALL_DEFINE1(inotify_init1, int, flags)
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -84,6 +84,9 @@
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
 #include <linux/flex_array.h>
+#include <linux/eventpoll.h>
+#include <linux/eventfd.h>
+#include <linux/inotify.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
@@ -1727,9 +1730,7 @@ out:
 	return ~0U;
 }
 
-#define PROC_FDINFO_MAX 64
-
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+static int proc_fd_info(struct inode *inode, struct path *path, char *info, size_t avail)
 {
 	struct task_struct *task = get_proc_task(inode);
 	struct files_struct *files = NULL;
@@ -1761,7 +1762,7 @@ static int proc_fd_info(struct inode *in
 				path_get(&file->f_path);
 			}
 			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
+				snprintf(info, avail,
 					 "pos:\t%lli\n"
 					 "flags:\t0%o\n",
 					 (long long) file->f_pos,
@@ -1778,7 +1779,7 @@ static int proc_fd_info(struct inode *in
 
 static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	return proc_fd_info(dentry->d_inode, path, NULL);
+	return proc_fd_info(dentry->d_inode, path, NULL, 0);
 }
 
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1980,10 +1981,58 @@ static int proc_readfd(struct file *filp
 static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
 				      size_t len, loff_t *ppos)
 {
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
+	struct task_struct *task;
+	struct files_struct *files;
+	char *info;
+	size_t off;
+	int err;
+
+	info = (char *)kmalloc(PAGE_SIZE, GFP_ATOMIC);
+	if (!info)
+		return -ENOMEM;
+
+	err = proc_fd_info(file->f_path.dentry->d_inode, NULL, info, PAGE_SIZE - 1);
+	if (err)
+		goto out;
+
+	off = strlen(info);
+	if (off >= PAGE_SIZE - 1) {
+		err = -ENOSPC;
+		goto end;
+	}
+
+	task = get_proc_task(file->f_path.dentry->d_inode);
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	} else
+		files = NULL;
+
+	if (files) {
+		struct file *tfile;
+
+		spin_lock(&files->file_lock);
+		tfile = fcheck_files(files, proc_fd(file->f_path.dentry->d_inode));
+
+		if (tfile) {
+			if (is_file_epoll(tfile))
+				err = epoll_fd_info(tfile, &info[off], PAGE_SIZE - off - 1);
+			else if (is_file_eventfd(tfile))
+				err = eventfd_fd_info(tfile, &info[off], PAGE_SIZE - off - 1);
+			else if (is_file_inotify(tfile))
+				err = inotify_fd_info(tfile, &info[off], PAGE_SIZE - off - 1);
+		}
+
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+
+end:
 	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
+		err = simple_read_from_buffer(buf, len, ppos, info, strlen(info));
+
+out:
+	kfree((unsigned long)info);
 	return err;
 }
 
Index: linux-2.6.git/include/linux/eventfd.h
===================================================================
--- linux-2.6.git.orig/include/linux/eventfd.h
+++ linux-2.6.git/include/linux/eventfd.h
@@ -44,6 +44,9 @@ ssize_t eventfd_ctx_read(struct eventfd_
 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
 				  __u64 *cnt);
 
+int is_file_eventfd(struct file *f);
+int eventfd_fd_info(struct file *f, char *info, size_t avail);
+
 #else /* CONFIG_EVENTFD */
 
 /*
@@ -82,6 +85,16 @@ static inline int eventfd_ctx_remove_wai
 	return -ENOSYS;
 }
 
+static inline int is_file_eventfd(struct file *f)
+{
+	return 0;
+}
+
+static inline int eventfd_fd_info(struct file *f, char *info, size_t avail)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */
Index: linux-2.6.git/include/linux/eventpoll.h
===================================================================
--- linux-2.6.git.orig/include/linux/eventpoll.h
+++ linux-2.6.git/include/linux/eventpoll.h
@@ -96,10 +96,15 @@ static inline void eventpoll_release(str
 	eventpoll_release_file(file);
 }
 
+int is_file_epoll(struct file *f);
+int epoll_fd_info(struct file *f, char *info, size_t avail);
+
 #else
 
 static inline void eventpoll_init_file(struct file *file) {}
 static inline void eventpoll_release(struct file *file) {}
+static inline int is_file_epoll(struct file *f) { return 0; }
+static inline int epoll_fd_info(struct file *f, char *info, size_t avail) { return 0; }
 
 #endif
 
Index: linux-2.6.git/include/linux/inotify.h
===================================================================
--- linux-2.6.git.orig/include/linux/inotify.h
+++ linux-2.6.git/include/linux/inotify.h
@@ -101,6 +101,10 @@ extern struct ctl_table inotify_table[];
 			  IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD | \
 			  IN_ISDIR | IN_ONESHOT)
 
+
+int is_file_inotify(struct file *f);
+int inotify_fd_info(struct file *f, char *info, size_t avail);
+
 #endif
 
 #endif	/* _LINUX_INOTIFY_H */


More information about the CRIU mailing list