[CRIU] [PATCH 0/4] Early inotify review series v1
Cyrill Gorcunov
gorcunov at openvz.org
Tue Apr 10 08:42:59 EDT 2012
Hi, this series not for merging but rather to estimate
overall idea on inotify c/r. I've tested it with hand-maiden
test-case, not zdtm one (so unless I cook zdtm version
don't pick up this series).
The series won't work without kernel patch (which I attach
below), but k-patch is in my testing repo still.
---
fs, nofity: Add FS_INOTIFY_GET_MARK ioctl
In checkpoint/restore we need to dump the marks
a process has to be able to recreate them at
restore time.
For this purpose FS_INOTIFY_GET_MARK ioctl code
is introduced. The caller provides index of a
mark to retrieve and buffer to store the path
being watched.
On return the ioctl handler fills the path and
mask suitable for inotify_add_watch call.
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
fs/notify/inotify/inotify_user.c | 120 +++++++++++++++++++++++++++++++++++++--
fs/notify/mark.c | 6 +
include/linux/fsnotify_backend.h | 3
include/linux/inotify.h | 19 ++++++
4 files changed, 142 insertions(+), 6 deletions(-)
Index: linux-2.6.git/fs/notify/inotify/inotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify_user.c
+++ linux-2.6.git/fs/notify/inotify/inotify_user.c
@@ -36,8 +36,10 @@
#include <linux/types.h>
#include <linux/anon_inodes.h>
#include <linux/uaccess.h>
+#include <linux/mount.h>
#include <linux/poll.h>
#include <linux/wait.h>
+#include <linux/exportfs.h>
#include "inotify.h"
@@ -329,6 +331,108 @@ static long inotify_ioctl(struct file *f
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
break;
+
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+
+ case FS_INOTIFY_GET_MARK: {
+ struct inotify_user_mark __user *user_mark;
+ struct inotify_inode_mark *i_mark;
+ /* struct dnotify_mark *dn_mark; */
+ struct inotify_mark_req request;
+ struct fsnotify_mark *mark;
+ struct file_handle *fhandle;
+ struct inode *inode;
+ __u32 user_mask, user_wd;
+ int size;
+
+ ret = copy_from_user(&request, p, sizeof(request));
+ if (ret)
+ break;
+
+ user_mark = request.mark;
+
+ /* Linear search >:( */
+ ret = -ENOENT;
+ spin_lock(&group->mark_lock);
+ list_for_each_entry(mark, &group->marks_list, g_list) {
+ if (!request.index--) {
+ fsnotify_get_mark(mark);
+ if (!mark->target) {
+ fsnotify_put_mark(mark);
+ mark = NULL;
+ ret = -ENODATA;
+ }
+ goto found;
+ }
+ }
+ mark = NULL;
+found:
+ spin_unlock(&group->mark_lock);
+ if (!mark)
+ break;
+
+ fhandle = kmalloc(sizeof(*fhandle) + MAX_HANDLE_SZ, GFP_TEMPORARY);
+ if (!fhandle) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ fhandle->handle_bytes = MAX_HANDLE_SZ;
+ size = MAX_HANDLE_SZ >> 2;
+
+ spin_lock(&mark->lock);
+
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
+ ret = -ENODATA;
+ goto err_unlock;
+ }
+
+ user_mask = inotify_mask_to_arg(mark->mask);
+ if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+ i_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+ user_wd = i_mark->wd;
+ } else
+ user_wd = -1;
+ ret = exportfs_encode_fh(mark->target, (struct fid *)fhandle->f_handle, &size, 0);
+ if ((ret == 255) || (ret == -ENOSPC)) {
+ ret = -EOVERFLOW;
+ goto err_unlock;
+ }
+
+ fhandle->handle_type = ret;
+ fhandle->handle_bytes = size * sizeof(u32);
+
+ size = sizeof(*fhandle) + fhandle->handle_bytes;
+
+ if (size > (request.size - sizeof(*request.mark))) {
+ ret = -ENOSPC;
+ goto err_unlock;
+ }
+
+ inode = igrab(mark->target->d_inode);
+ spin_unlock(&mark->lock);
+
+ if (inode) {
+ ret = put_user((__u64)inode->i_ino, (__u64 *)&user_mark->i_ino);
+ ret |= put_user((__u32)inode->i_sb->s_dev, (__u32 *)&user_mark->s_dev);
+ ret |= put_user((__u32)inode->i_rdev, (__u32 *)&user_mark->i_rdev);
+ ret |= put_user(user_mask, (__u32 *)&user_mark->mask);
+ ret |= put_user(user_wd, (__u32 *)&user_mark->wd);
+ ret |= copy_to_user(user_mark->data, fhandle, size);
+ } else
+ ret = -ENODATA;
+ iput(inode);
+err:
+ kfree(fhandle);
+ fsnotify_put_mark(mark);
+ break;
+err_unlock:
+ spin_unlock(&mark->lock);
+ goto err;
+ }
+
+#endif /* CONFIG_CHECKPOINT_RESTORE && CONFIG_EXPORTFS */
+
}
return ret;
@@ -621,7 +725,7 @@ static int inotify_update_existing_watch
}
static int inotify_new_watch(struct fsnotify_group *group,
- struct inode *inode,
+ struct dentry *dentry,
u32 arg)
{
struct inotify_inode_mark *tmp_i_mark;
@@ -629,6 +733,7 @@ static int inotify_new_watch(struct fsno
int ret;
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+ struct inode *inode = dentry->d_inode;
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
@@ -660,6 +765,11 @@ static int inotify_new_watch(struct fsno
goto out_err;
}
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+ dget(dentry);
+ tmp_i_mark->fsn_mark.target = dentry;
+#endif
+
/* increment the number of watches the user has */
atomic_inc(&group->inotify_data.user->inotify_watches);
@@ -673,16 +783,16 @@ out_err:
return ret;
}
-static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+static int inotify_update_watch(struct fsnotify_group *group, struct dentry *dentry, u32 arg)
{
int ret = 0;
retry:
/* try to update and existing watch with the new arg */
- ret = inotify_update_existing_watch(group, inode, arg);
+ ret = inotify_update_existing_watch(group, dentry->d_inode, arg);
/* no mark present, try to add a new one */
if (ret == -ENOENT)
- ret = inotify_new_watch(group, inode, arg);
+ ret = inotify_new_watch(group, dentry, arg);
/*
* inotify_new_watch could race with another thread which did an
* inotify_new_watch between the update_existing and the add watch
@@ -785,7 +895,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int,
group = filp->private_data;
/* create/update an inode mark */
- ret = inotify_update_watch(group, inode, mask);
+ ret = inotify_update_watch(group, path.dentry, mask);
path_put(&path);
fput_and_out:
fput_light(filp, fput_needed);
Index: linux-2.6.git/fs/notify/mark.c
===================================================================
--- linux-2.6.git.orig/fs/notify/mark.c
+++ linux-2.6.git/fs/notify/mark.c
@@ -109,8 +109,12 @@ void fsnotify_get_mark(struct fsnotify_m
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
- if (atomic_dec_and_test(&mark->refcnt))
+ if (atomic_dec_and_test(&mark->refcnt)) {
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+ dput(mark->target);
+#endif
mark->free_mark(mark);
+ }
}
/*
Index: linux-2.6.git/include/linux/fsnotify_backend.h
===================================================================
--- linux-2.6.git.orig/include/linux/fsnotify_backend.h
+++ linux-2.6.git/include/linux/fsnotify_backend.h
@@ -276,6 +276,9 @@ struct fsnotify_vfsmount_mark {
* inode eviction or modification.
*/
struct fsnotify_mark {
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_EXPORTFS)
+ struct dentry *target; /* target which mark is watching on */
+#endif
__u32 mask; /* mask this mark is for */
/* we hold ref for each i_list and g_list. also one ref for each 'thing'
* in kernel that found and may be using this mark. */
Index: linux-2.6.git/include/linux/inotify.h
===================================================================
--- linux-2.6.git.orig/include/linux/inotify.h
+++ linux-2.6.git/include/linux/inotify.h
@@ -10,6 +10,7 @@
/* For O_CLOEXEC and O_NONBLOCK */
#include <linux/fcntl.h>
#include <linux/types.h>
+#include <linux/ioctl.h>
/*
* struct inotify_event - structure read from the inotify device for each event
@@ -70,6 +71,24 @@ struct inotify_event {
#define IN_CLOEXEC O_CLOEXEC
#define IN_NONBLOCK O_NONBLOCK
+/* To retrieve notify watchers via ioctl */
+struct inotify_user_mark {
+ __u64 i_ino; /* inode mask refers to */
+ __u32 mask;
+ __u32 s_dev; /* device inode lays on */
+ __u32 i_rdev; /* device inode lays on, if special */
+ __u32 wd; /* watch descriptor */
+ __u8 data[0];/* file handle encoded */
+};
+
+struct inotify_mark_req {
+ __u32 index; /* mark index we're interested in */
+ __u32 size; /* size provided for @mark */
+ struct inotify_user_mark __user *mark;
+};
+
+#define FS_INOTIFY_GET_MARK _IOWR('I', 0x12, unsigned int)
+
#ifdef __KERNEL__
#include <linux/sysctl.h>
extern struct ctl_table inotify_table[]; /* for sysctl */
More information about the CRIU
mailing list