[Devel] [PATCH vz9 v2 2/2] fs: use is_sb_ve_accessible() for VE-filtered sync

Kirill Tkhai ktkhai at virtuozzo.com
Tue Nov 2 19:00:25 MSK 2021


On 02.11.2021 17:45, Nikita Yushchenko wrote:
> Remove dedicated implementation of VE-filtered sync.
> 
> Instead, implement VE-filtered sync within common sync code, by passing
> VE to sync_inodes_one_sb() and sync_fs_one_sb(), and filtering SBs to
> sync there via is_sb_ve_accessible().
> 
> This makes VE-filtered sync to include mounts from VE's non-root mount
> namespaces.
> 
> https://jira.sw.ru/browse/PSBM-44684
> Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>

This is still difficult to review. We should use a single patch for a single change.
Refactorings should go in separate patches to not distract our attention from really important places.
Could you please move sync_arg introduction in a separate patch like in attached file I did?

> ---
>  fs/sync.c | 169 ++++++++++++++++--------------------------------------
>  1 file changed, 48 insertions(+), 121 deletions(-)
> 
> diff --git a/fs/sync.c b/fs/sync.c
> index e9711a9424d9..b417c2152d0a 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -71,17 +71,39 @@ int sync_filesystem(struct super_block *sb)
>  }
>  EXPORT_SYMBOL(sync_filesystem);
>  
> +struct sync_arg {
> +	struct ve_struct *ve;
> +	int wait;
> +};
> +
>  static void sync_inodes_one_sb(struct super_block *sb, void *arg)
>  {
> +	struct sync_arg *sarg = arg;
> +
> +	if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
> +		return;
> +
>  	if (!sb_rdonly(sb))
>  		sync_inodes_sb(sb);
>  }
>  
>  static void sync_fs_one_sb(struct super_block *sb, void *arg)
>  {
> -	if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
> -	    sb->s_op->sync_fs)
> -		sb->s_op->sync_fs(sb, *(int *)arg);
> +	struct sync_arg *sarg = arg;
> +
> +	if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
> +		return;
> +
> +	if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC)) {
> +		if (sb->s_op->sync_fs)
> +			sb->s_op->sync_fs(sb, sarg->wait);
> +
> +		/* For ve-local sync, process bdev here, since there is no easy
> +		 * equivalent of is_sb_ve_accessible() for bdevs
> +		 */
> +		if (sarg->ve)
> +			__sync_blockdev(sb->s_bdev, sarg->wait);

Just curious, but why not fdatawrite_one_bdev()+fdatawait_one_bdev() like in generic case?
This will underline, we do the same as in generic case, but just in another place,
and there will be no questions for a further reader whether there is something different or not. 
 
Or there is something different between __sync_blockdev() and two above calls I mentioned? 

>
> +	}
>  }
>  
>  static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
> @@ -99,105 +121,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
>  	filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
>  }
>  
> -struct sync_sb {
> -	struct list_head list;
> -	struct super_block *sb;
> -};
> -
> -static void sync_release_filesystems(struct list_head *sync_list)
> -{
> -	struct sync_sb *ss, *tmp;
> -
> -	list_for_each_entry_safe(ss, tmp, sync_list, list) {
> -		list_del(&ss->list);
> -		put_super(ss->sb);
> -		kfree(ss);
> -	}
> -}
> -
> -static int sync_filesystem_collected(struct list_head *sync_list, struct super_block *sb)
> -{
> -	struct sync_sb *ss;
> -
> -	list_for_each_entry(ss, sync_list, list)
> -		if (ss->sb == sb)
> -			return 1;
> -	return 0;
> -}
> -
> -static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
> -{
> -	struct mount *mnt;
> -	struct mnt_namespace *mnt_ns;
> -	struct nsproxy *ve_ns;
> -	struct sync_sb *ss;
> -	int ret = 0;
> -
> -	BUG_ON(!list_empty(sync_list));
> -
> -	down_read(&namespace_sem);
> -
> -	rcu_read_lock();
> -	ve_ns = rcu_dereference(ve->ve_ns);
> -	if (!ve_ns) {
> -		rcu_read_unlock();
> -		up_read(&namespace_sem);
> -		return 0;
> -	}
> -	mnt_ns = ve_ns->mnt_ns;
> -	rcu_read_unlock();
> -
> -	mnt = mnt_list_next(mnt_ns, &mnt_ns->list);
> -	while (mnt) {
> -		if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
> -			goto next;
> -
> -		ss = kmalloc(sizeof(*ss), GFP_KERNEL);
> -		if (ss == NULL) {
> -			ret = -ENOMEM;
> -			break;
> -		}
> -		ss->sb = mnt->mnt.mnt_sb;
> -		/*
> -		 * We hold mount point and thus can be sure, that superblock is
> -		 * alive. And it means, that we can safely increase it's usage
> -		 * counter.
> -		 */
> -		spin_lock(&sb_lock);
> -		ss->sb->s_count++;
> -		spin_unlock(&sb_lock);
> -		list_add_tail(&ss->list, sync_list);
> -next:
> -		mnt = mnt_list_next(mnt_ns, &mnt->mnt_list);
> -	}
> -	up_read(&namespace_sem);
> -	return ret;
> -}
> -
> -static void sync_filesystems_ve(struct ve_struct *ve, int wait)
> -{
> -	struct super_block *sb;
> -	LIST_HEAD(sync_list);
> -	struct sync_sb *ss;
> -
> -	/*
> -	 * We don't need to care about allocating failure here. At least we
> -	 * don't need to skip sync on such error.
> -	 * Let's sync what we collected already instead.
> -	 */
> -	sync_collect_filesystems(ve, &sync_list);
> -
> -	list_for_each_entry(ss, &sync_list, list) {
> -		sb = ss->sb;
> -		down_read(&sb->s_umount);
> -		if (!sb_rdonly(sb) && sb->s_root && (sb->s_flags & SB_BORN))
> -			__sync_filesystem(sb, wait);
> -		up_read(&sb->s_umount);
> -	}
> -
> -	sync_release_filesystems(&sync_list);
> -}
> -
>  static int __ve_fsync_behavior(struct ve_struct *ve)
>  {
>  	/*
> @@ -237,8 +160,9 @@ int ve_fsync_behavior(void)
>  void ksys_sync(void)
>  {
>  	struct ve_struct *ve = get_exec_env();
> -	int nowait = 0, wait = 1;
> +	struct sync_arg sarg;
>  
> +	sarg.ve = NULL;
>  	if (!ve_is_super(ve)) {
>  		int fsb;
>  		/*
> @@ -254,22 +178,22 @@ void ksys_sync(void)
>  		fsb = __ve_fsync_behavior(ve);
>  		if (fsb == FSYNC_NEVER)
>  			return;
> -
> -		if (fsb == FSYNC_FILTERED) {
> -			sync_filesystems_ve(ve, nowait);
> -			sync_filesystems_ve(ve, wait);
> -			return;
> -		}
> +		if (fsb == FSYNC_FILTERED)
> +			sarg.ve = ve;
>  	}
>  
>  	wakeup_flusher_threads(WB_REASON_SYNC);
> -	iterate_supers(sync_inodes_one_sb, NULL);
> -	iterate_supers(sync_fs_one_sb, &nowait);
> -	iterate_supers(sync_fs_one_sb, &wait);
> -	iterate_bdevs(fdatawrite_one_bdev, NULL);
> -	iterate_bdevs(fdatawait_one_bdev, NULL);
> -	if (unlikely(laptop_mode))
> -		laptop_sync_completion();
> +	iterate_supers(sync_inodes_one_sb, &sarg);
> +	sarg.wait = 0;
> +	iterate_supers(sync_fs_one_sb, &sarg);
> +	sarg.wait = 1;
> +	iterate_supers(sync_fs_one_sb, &sarg);
> +	if (!sarg.ve) {
> +		iterate_bdevs(fdatawrite_one_bdev, NULL);
> +		iterate_bdevs(fdatawait_one_bdev, NULL);
> +		if (unlikely(laptop_mode))
> +			laptop_sync_completion();
> +	}
>  }
>  
>  SYSCALL_DEFINE0(sync)
> @@ -280,17 +204,20 @@ SYSCALL_DEFINE0(sync)
>  
>  static void do_sync_work(struct work_struct *work)
>  {
> -	int nowait = 0;
> +	struct sync_arg sarg;
> +
> +	sarg.ve = NULL;
> +	sarg.wait = 0;
>  
>  	/*
>  	 * Sync twice to reduce the possibility we skipped some inodes / pages
>  	 * because they were temporarily locked
>  	 */
> -	iterate_supers(sync_inodes_one_sb, &nowait);
> -	iterate_supers(sync_fs_one_sb, &nowait);
> +	iterate_supers(sync_inodes_one_sb, &sarg);
> +	iterate_supers(sync_fs_one_sb, &sarg);
>  	iterate_bdevs(fdatawrite_one_bdev, NULL);
> -	iterate_supers(sync_inodes_one_sb, &nowait);
> -	iterate_supers(sync_fs_one_sb, &nowait);
> +	iterate_supers(sync_inodes_one_sb, &sarg);
> +	iterate_supers(sync_fs_one_sb, &sarg);
>  	iterate_bdevs(fdatawrite_one_bdev, NULL);
>  	printk("Emergency Sync complete\n");
>  	kfree(work);
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: p.diff
Type: text/x-patch
Size: 2337 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20211102/60008ff4/attachment.bin>


More information about the Devel mailing list