[CRIU] [PATCH 0/4] Early inotify review series v1

Cyrill Gorcunov gorcunov at openvz.org
Tue Apr 10 08:42:59 EDT 2012


Hi, this series not for merging but rather to estimate
overall idea on inotify c/r. I've tested it with hand-maiden
test-case, not zdtm one (so unless I cook zdtm version
don't pick up this series).

The series won't work without kernel patch (which I attach
below), but k-patch is in my testing repo still.
---
fs, nofity: Add FS_INOTIFY_GET_MARK ioctl

In checkpoint/restore we need to dump the marks
a process has to be able to recreate them at
restore time.

For this purpose FS_INOTIFY_GET_MARK ioctl code
is introduced. The caller provides index of a
mark to retrieve and buffer to store the path
being watched.

On return the ioctl handler fills the path and
mask suitable for inotify_add_watch call.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 fs/notify/inotify/inotify_user.c |  120 +++++++++++++++++++++++++++++++++++++--
 fs/notify/mark.c                 |    6 +
 include/linux/fsnotify_backend.h |    3 
 include/linux/inotify.h          |   19 ++++++
 4 files changed, 142 insertions(+), 6 deletions(-)

Index: linux-2.6.git/fs/notify/inotify/inotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify_user.c
+++ linux-2.6.git/fs/notify/inotify/inotify_user.c
@@ -36,8 +36,10 @@
 #include <linux/types.h>
 #include <linux/anon_inodes.h>
 #include <linux/uaccess.h>
+#include <linux/mount.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
+#include <linux/exportfs.h>
 
 #include "inotify.h"
 
@@ -329,6 +331,108 @@ static long inotify_ioctl(struct file *f
 		mutex_unlock(&group->notification_mutex);
 		ret = put_user(send_len, (int __user *) p);
 		break;
+
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+
+	case FS_INOTIFY_GET_MARK: {
+		struct inotify_user_mark __user *user_mark;
+		struct inotify_inode_mark *i_mark;
+		/* struct dnotify_mark *dn_mark; */
+		struct inotify_mark_req request;
+		struct fsnotify_mark *mark;
+		struct file_handle *fhandle;
+		struct inode *inode;
+		__u32 user_mask, user_wd;
+		int size;
+
+		ret = copy_from_user(&request, p, sizeof(request));
+		if (ret)
+			break;
+
+		user_mark = request.mark;
+
+		/* Linear search >:( */
+		ret = -ENOENT;
+		spin_lock(&group->mark_lock);
+		list_for_each_entry(mark, &group->marks_list, g_list) {
+			if (!request.index--) {
+				fsnotify_get_mark(mark);
+				if (!mark->target) {
+					fsnotify_put_mark(mark);
+					mark = NULL;
+					ret = -ENODATA;
+				}
+				goto found;
+			}
+		}
+		mark = NULL;
+found:
+		spin_unlock(&group->mark_lock);
+		if (!mark)
+			break;
+
+		fhandle = kmalloc(sizeof(*fhandle) + MAX_HANDLE_SZ, GFP_TEMPORARY);
+		if (!fhandle) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		fhandle->handle_bytes = MAX_HANDLE_SZ;
+		size = MAX_HANDLE_SZ >> 2;
+
+		spin_lock(&mark->lock);
+
+		if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
+			ret = -ENODATA;
+			goto err_unlock;
+		}
+
+		user_mask = inotify_mask_to_arg(mark->mask);
+		if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+			i_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+			user_wd = i_mark->wd;
+		} else
+			user_wd = -1;
+		ret = exportfs_encode_fh(mark->target, (struct fid *)fhandle->f_handle, &size,  0);
+		if ((ret == 255) || (ret == -ENOSPC)) {
+			ret = -EOVERFLOW;
+			goto err_unlock;
+		}
+
+		fhandle->handle_type = ret;
+		fhandle->handle_bytes = size * sizeof(u32);
+
+		size = sizeof(*fhandle) + fhandle->handle_bytes;
+
+		if (size > (request.size - sizeof(*request.mark))) {
+			ret = -ENOSPC;
+			goto err_unlock;
+		}
+
+		inode = igrab(mark->target->d_inode);
+		spin_unlock(&mark->lock);
+
+		if (inode) {
+			ret  = put_user((__u64)inode->i_ino, (__u64 *)&user_mark->i_ino);
+			ret |= put_user((__u32)inode->i_sb->s_dev, (__u32 *)&user_mark->s_dev);
+			ret |= put_user((__u32)inode->i_rdev, (__u32 *)&user_mark->i_rdev);
+			ret |= put_user(user_mask, (__u32 *)&user_mark->mask);
+			ret |= put_user(user_wd, (__u32 *)&user_mark->wd);
+			ret |= copy_to_user(user_mark->data, fhandle, size);
+		} else
+			ret = -ENODATA;
+		iput(inode);
+err:
+		kfree(fhandle);
+		fsnotify_put_mark(mark);
+		break;
+err_unlock:
+		spin_unlock(&mark->lock);
+		goto err;
+	}
+
+#endif /* CONFIG_CHECKPOINT_RESTORE && CONFIG_EXPORTFS */
+
 	}
 
 	return ret;
@@ -621,7 +725,7 @@ static int inotify_update_existing_watch
 }
 
 static int inotify_new_watch(struct fsnotify_group *group,
-			     struct inode *inode,
+			     struct dentry *dentry,
 			     u32 arg)
 {
 	struct inotify_inode_mark *tmp_i_mark;
@@ -629,6 +733,7 @@ static int inotify_new_watch(struct fsno
 	int ret;
 	struct idr *idr = &group->inotify_data.idr;
 	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+	struct inode *inode = dentry->d_inode;
 
 	/* don't allow invalid bits: we don't want flags set */
 	mask = inotify_arg_to_mask(arg);
@@ -660,6 +765,11 @@ static int inotify_new_watch(struct fsno
 		goto out_err;
 	}
 
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+	dget(dentry);
+	tmp_i_mark->fsn_mark.target = dentry;
+#endif
+
 	/* increment the number of watches the user has */
 	atomic_inc(&group->inotify_data.user->inotify_watches);
 
@@ -673,16 +783,16 @@ out_err:
 	return ret;
 }
 
-static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+static int inotify_update_watch(struct fsnotify_group *group, struct dentry *dentry, u32 arg)
 {
 	int ret = 0;
 
 retry:
 	/* try to update and existing watch with the new arg */
-	ret = inotify_update_existing_watch(group, inode, arg);
+	ret = inotify_update_existing_watch(group, dentry->d_inode, arg);
 	/* no mark present, try to add a new one */
 	if (ret == -ENOENT)
-		ret = inotify_new_watch(group, inode, arg);
+		ret = inotify_new_watch(group, dentry, arg);
 	/*
 	 * inotify_new_watch could race with another thread which did an
 	 * inotify_new_watch between the update_existing and the add watch
@@ -785,7 +895,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int,
 	group = filp->private_data;
 
 	/* create/update an inode mark */
-	ret = inotify_update_watch(group, inode, mask);
+	ret = inotify_update_watch(group, path.dentry, mask);
 	path_put(&path);
 fput_and_out:
 	fput_light(filp, fput_needed);
Index: linux-2.6.git/fs/notify/mark.c
===================================================================
--- linux-2.6.git.orig/fs/notify/mark.c
+++ linux-2.6.git/fs/notify/mark.c
@@ -109,8 +109,12 @@ void fsnotify_get_mark(struct fsnotify_m
 
 void fsnotify_put_mark(struct fsnotify_mark *mark)
 {
-	if (atomic_dec_and_test(&mark->refcnt))
+	if (atomic_dec_and_test(&mark->refcnt)) {
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+		dput(mark->target);
+#endif
 		mark->free_mark(mark);
+	}
 }
 
 /*
Index: linux-2.6.git/include/linux/fsnotify_backend.h
===================================================================
--- linux-2.6.git.orig/include/linux/fsnotify_backend.h
+++ linux-2.6.git/include/linux/fsnotify_backend.h
@@ -276,6 +276,9 @@ struct fsnotify_vfsmount_mark {
  * inode eviction or modification.
  */
 struct fsnotify_mark {
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+	struct dentry *target;		/* target which mark is watching on */
+#endif
 	__u32 mask;			/* mask this mark is for */
 	/* we hold ref for each i_list and g_list.  also one ref for each 'thing'
 	 * in kernel that found and may be using this mark. */
Index: linux-2.6.git/include/linux/inotify.h
===================================================================
--- linux-2.6.git.orig/include/linux/inotify.h
+++ linux-2.6.git/include/linux/inotify.h
@@ -10,6 +10,7 @@
 /* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 #include <linux/types.h>
+#include <linux/ioctl.h>
 
 /*
  * struct inotify_event - structure read from the inotify device for each event
@@ -70,6 +71,24 @@ struct inotify_event {
 #define IN_CLOEXEC O_CLOEXEC
 #define IN_NONBLOCK O_NONBLOCK
 
+/* To retrieve notify watchers via ioctl */
+struct inotify_user_mark {
+	__u64				i_ino;	/* inode mask refers to */
+	__u32				mask;
+	__u32				s_dev;	/* device inode lays on */
+	__u32				i_rdev;	/* device inode lays on, if special */
+	__u32				wd;	/* watch descriptor */
+	__u8				data[0];/* file handle encoded */
+};
+
+struct inotify_mark_req {
+	__u32				index;	/* mark index we're interested in */
+	__u32				size;	/* size provided for @mark */
+	struct inotify_user_mark __user	*mark;
+};
+
+#define FS_INOTIFY_GET_MARK		_IOWR('I', 0x12, unsigned int)
+
 #ifdef __KERNEL__
 #include <linux/sysctl.h>
 extern struct ctl_table inotify_table[]; /* for sysctl */


More information about the CRIU mailing list