[Devel] [PATCH vz9 v2 2/2] fs: use is_sb_ve_accessible() for VE-filtered sync
Kirill Tkhai
ktkhai at virtuozzo.com
Tue Nov 2 19:00:25 MSK 2021
On 02.11.2021 17:45, Nikita Yushchenko wrote:
> Remove dedicated implementation of VE-filtered sync.
>
> Instead, implement VE-filtered sync within common sync code, by passing
> VE to sync_inodes_one_sb() and sync_fs_one_sb(), and filtering SBs to
> sync there via is_sb_ve_accessible().
>
> This makes VE-filtered sync to include mounts from VE's non-root mount
> namespaces.
>
> https://jira.sw.ru/browse/PSBM-44684
> Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
This is still difficult to review. We should use a single patch for a single change.
Refactorings should go in separate patches to not distract our attention from really important places.
Could you please move sync_arg introduction in a separate patch like in attached file I did?
> ---
> fs/sync.c | 169 ++++++++++++++++--------------------------------------
> 1 file changed, 48 insertions(+), 121 deletions(-)
>
> diff --git a/fs/sync.c b/fs/sync.c
> index e9711a9424d9..b417c2152d0a 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -71,17 +71,39 @@ int sync_filesystem(struct super_block *sb)
> }
> EXPORT_SYMBOL(sync_filesystem);
>
> +struct sync_arg {
> + struct ve_struct *ve;
> + int wait;
> +};
> +
> static void sync_inodes_one_sb(struct super_block *sb, void *arg)
> {
> + struct sync_arg *sarg = arg;
> +
> + if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
> + return;
> +
> if (!sb_rdonly(sb))
> sync_inodes_sb(sb);
> }
>
> static void sync_fs_one_sb(struct super_block *sb, void *arg)
> {
> - if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
> - sb->s_op->sync_fs)
> - sb->s_op->sync_fs(sb, *(int *)arg);
> + struct sync_arg *sarg = arg;
> +
> + if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
> + return;
> +
> + if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC)) {
> + if (sb->s_op->sync_fs)
> + sb->s_op->sync_fs(sb, sarg->wait);
> +
> + /* For ve-local sync, process bdev here, since there is no easy
> + * equivalent of is_sb_ve_accessible() for bdevs
> + */
> + if (sarg->ve)
> + __sync_blockdev(sb->s_bdev, sarg->wait);
Just curious, but why not fdatawrite_one_bdev()+fdatawait_one_bdev() like in generic case?
This will underline, we do the same as in generic case, but just in another place,
and there will be no questions for a further reader whether there is something different or not.
Or there is something different between __sync_blockdev() and two above calls I mentioned?
>
> + }
> }
>
> static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
> @@ -99,105 +121,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
> filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
> }
>
> -struct sync_sb {
> - struct list_head list;
> - struct super_block *sb;
> -};
> -
> -static void sync_release_filesystems(struct list_head *sync_list)
> -{
> - struct sync_sb *ss, *tmp;
> -
> - list_for_each_entry_safe(ss, tmp, sync_list, list) {
> - list_del(&ss->list);
> - put_super(ss->sb);
> - kfree(ss);
> - }
> -}
> -
> -static int sync_filesystem_collected(struct list_head *sync_list, struct super_block *sb)
> -{
> - struct sync_sb *ss;
> -
> - list_for_each_entry(ss, sync_list, list)
> - if (ss->sb == sb)
> - return 1;
> - return 0;
> -}
> -
> -static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
> -{
> - struct mount *mnt;
> - struct mnt_namespace *mnt_ns;
> - struct nsproxy *ve_ns;
> - struct sync_sb *ss;
> - int ret = 0;
> -
> - BUG_ON(!list_empty(sync_list));
> -
> - down_read(&namespace_sem);
> -
> - rcu_read_lock();
> - ve_ns = rcu_dereference(ve->ve_ns);
> - if (!ve_ns) {
> - rcu_read_unlock();
> - up_read(&namespace_sem);
> - return 0;
> - }
> - mnt_ns = ve_ns->mnt_ns;
> - rcu_read_unlock();
> -
> - mnt = mnt_list_next(mnt_ns, &mnt_ns->list);
> - while (mnt) {
> - if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
> - goto next;
> -
> - ss = kmalloc(sizeof(*ss), GFP_KERNEL);
> - if (ss == NULL) {
> - ret = -ENOMEM;
> - break;
> - }
> - ss->sb = mnt->mnt.mnt_sb;
> - /*
> - * We hold mount point and thus can be sure, that superblock is
> - * alive. And it means, that we can safely increase it's usage
> - * counter.
> - */
> - spin_lock(&sb_lock);
> - ss->sb->s_count++;
> - spin_unlock(&sb_lock);
> - list_add_tail(&ss->list, sync_list);
> -next:
> - mnt = mnt_list_next(mnt_ns, &mnt->mnt_list);
> - }
> - up_read(&namespace_sem);
> - return ret;
> -}
> -
> -static void sync_filesystems_ve(struct ve_struct *ve, int wait)
> -{
> - struct super_block *sb;
> - LIST_HEAD(sync_list);
> - struct sync_sb *ss;
> -
> - /*
> - * We don't need to care about allocating failure here. At least we
> - * don't need to skip sync on such error.
> - * Let's sync what we collected already instead.
> - */
> - sync_collect_filesystems(ve, &sync_list);
> -
> - list_for_each_entry(ss, &sync_list, list) {
> - sb = ss->sb;
> - down_read(&sb->s_umount);
> - if (!sb_rdonly(sb) && sb->s_root && (sb->s_flags & SB_BORN))
> - __sync_filesystem(sb, wait);
> - up_read(&sb->s_umount);
> - }
> -
> - sync_release_filesystems(&sync_list);
> -}
> -
> static int __ve_fsync_behavior(struct ve_struct *ve)
> {
> /*
> @@ -237,8 +160,9 @@ int ve_fsync_behavior(void)
> void ksys_sync(void)
> {
> struct ve_struct *ve = get_exec_env();
> - int nowait = 0, wait = 1;
> + struct sync_arg sarg;
>
> + sarg.ve = NULL;
> if (!ve_is_super(ve)) {
> int fsb;
> /*
> @@ -254,22 +178,22 @@ void ksys_sync(void)
> fsb = __ve_fsync_behavior(ve);
> if (fsb == FSYNC_NEVER)
> return;
> -
> - if (fsb == FSYNC_FILTERED) {
> - sync_filesystems_ve(ve, nowait);
> - sync_filesystems_ve(ve, wait);
> - return;
> - }
> + if (fsb == FSYNC_FILTERED)
> + sarg.ve = ve;
> }
>
> wakeup_flusher_threads(WB_REASON_SYNC);
> - iterate_supers(sync_inodes_one_sb, NULL);
> - iterate_supers(sync_fs_one_sb, &nowait);
> - iterate_supers(sync_fs_one_sb, &wait);
> - iterate_bdevs(fdatawrite_one_bdev, NULL);
> - iterate_bdevs(fdatawait_one_bdev, NULL);
> - if (unlikely(laptop_mode))
> - laptop_sync_completion();
> + iterate_supers(sync_inodes_one_sb, &sarg);
> + sarg.wait = 0;
> + iterate_supers(sync_fs_one_sb, &sarg);
> + sarg.wait = 1;
> + iterate_supers(sync_fs_one_sb, &sarg);
> + if (!sarg.ve) {
> + iterate_bdevs(fdatawrite_one_bdev, NULL);
> + iterate_bdevs(fdatawait_one_bdev, NULL);
> + if (unlikely(laptop_mode))
> + laptop_sync_completion();
> + }
> }
>
> SYSCALL_DEFINE0(sync)
> @@ -280,17 +204,20 @@ SYSCALL_DEFINE0(sync)
>
> static void do_sync_work(struct work_struct *work)
> {
> - int nowait = 0;
> + struct sync_arg sarg;
> +
> + sarg.ve = NULL;
> + sarg.wait = 0;
>
> /*
> * Sync twice to reduce the possibility we skipped some inodes / pages
> * because they were temporarily locked
> */
> - iterate_supers(sync_inodes_one_sb, &nowait);
> - iterate_supers(sync_fs_one_sb, &nowait);
> + iterate_supers(sync_inodes_one_sb, &sarg);
> + iterate_supers(sync_fs_one_sb, &sarg);
> iterate_bdevs(fdatawrite_one_bdev, NULL);
> - iterate_supers(sync_inodes_one_sb, &nowait);
> - iterate_supers(sync_fs_one_sb, &nowait);
> + iterate_supers(sync_inodes_one_sb, &sarg);
> + iterate_supers(sync_fs_one_sb, &sarg);
> iterate_bdevs(fdatawrite_one_bdev, NULL);
> printk("Emergency Sync complete\n");
> kfree(work);
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: p.diff
Type: text/x-patch
Size: 2337 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20211102/60008ff4/attachment.bin>
More information about the Devel
mailing list