[Devel] [PATCH vz9 v2 2/2] fs: use is_sb_ve_accessible() for VE-filtered sync

Nikita Yushchenko nikita.yushchenko at virtuozzo.com
Tue Nov 2 17:45:42 MSK 2021


Remove dedicated implementation of VE-filtered sync.

Instead, implement VE-filtered sync within common sync code, by passing
VE to sync_inodes_one_sb() and sync_fs_one_sb(), and filtering SBs to
sync there via is_sb_ve_accessible().

This makes VE-filtered sync to include mounts from VE's non-root mount
namespaces.

https://jira.sw.ru/browse/PSBM-44684
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 fs/sync.c | 169 ++++++++++++++++--------------------------------------
 1 file changed, 48 insertions(+), 121 deletions(-)

diff --git a/fs/sync.c b/fs/sync.c
index e9711a9424d9..b417c2152d0a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -71,17 +71,39 @@ int sync_filesystem(struct super_block *sb)
 }
 EXPORT_SYMBOL(sync_filesystem);
 
+struct sync_arg {
+	struct ve_struct *ve;
+	int wait;
+};
+
 static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 {
+	struct sync_arg *sarg = arg;
+
+	if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
+		return;
+
 	if (!sb_rdonly(sb))
 		sync_inodes_sb(sb);
 }
 
 static void sync_fs_one_sb(struct super_block *sb, void *arg)
 {
-	if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
-	    sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, *(int *)arg);
+	struct sync_arg *sarg = arg;
+
+	if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
+		return;
+
+	if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC)) {
+		if (sb->s_op->sync_fs)
+			sb->s_op->sync_fs(sb, sarg->wait);
+
+		/* For ve-local sync, process bdev here, since there is no easy
+		 * equivalent of is_sb_ve_accessible() for bdevs
+		 */
+		if (sarg->ve)
+			__sync_blockdev(sb->s_bdev, sarg->wait);
+	}
 }
 
 static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
@@ -99,105 +121,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 	filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
 }
 
-struct sync_sb {
-	struct list_head list;
-	struct super_block *sb;
-};
-
-static void sync_release_filesystems(struct list_head *sync_list)
-{
-	struct sync_sb *ss, *tmp;
-
-	list_for_each_entry_safe(ss, tmp, sync_list, list) {
-		list_del(&ss->list);
-		put_super(ss->sb);
-		kfree(ss);
-	}
-}
-
-static int sync_filesystem_collected(struct list_head *sync_list, struct super_block *sb)
-{
-	struct sync_sb *ss;
-
-	list_for_each_entry(ss, sync_list, list)
-		if (ss->sb == sb)
-			return 1;
-	return 0;
-}
-
-static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
-{
-	struct mount *mnt;
-	struct mnt_namespace *mnt_ns;
-	struct nsproxy *ve_ns;
-	struct sync_sb *ss;
-	int ret = 0;
-
-	BUG_ON(!list_empty(sync_list));
-
-	down_read(&namespace_sem);
-
-	rcu_read_lock();
-	ve_ns = rcu_dereference(ve->ve_ns);
-	if (!ve_ns) {
-		rcu_read_unlock();
-		up_read(&namespace_sem);
-		return 0;
-	}
-	mnt_ns = ve_ns->mnt_ns;
-	rcu_read_unlock();
-
-	mnt = mnt_list_next(mnt_ns, &mnt_ns->list);
-	while (mnt) {
-		if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
-			goto next;
-
-		ss = kmalloc(sizeof(*ss), GFP_KERNEL);
-		if (ss == NULL) {
-			ret = -ENOMEM;
-			break;
-		}
-		ss->sb = mnt->mnt.mnt_sb;
-		/*
-		 * We hold mount point and thus can be sure, that superblock is
-		 * alive. And it means, that we can safely increase it's usage
-		 * counter.
-		 */
-		spin_lock(&sb_lock);
-		ss->sb->s_count++;
-		spin_unlock(&sb_lock);
-		list_add_tail(&ss->list, sync_list);
-next:
-		mnt = mnt_list_next(mnt_ns, &mnt->mnt_list);
-	}
-	up_read(&namespace_sem);
-	return ret;
-}
-
-static void sync_filesystems_ve(struct ve_struct *ve, int wait)
-{
-	struct super_block *sb;
-	LIST_HEAD(sync_list);
-	struct sync_sb *ss;
-
-	/*
-	 * We don't need to care about allocating failure here. At least we
-	 * don't need to skip sync on such error.
-	 * Let's sync what we collected already instead.
-	 */
-	sync_collect_filesystems(ve, &sync_list);
-
-	list_for_each_entry(ss, &sync_list, list) {
-		sb = ss->sb;
-		down_read(&sb->s_umount);
-		if (!sb_rdonly(sb) && sb->s_root && (sb->s_flags & SB_BORN))
-			__sync_filesystem(sb, wait);
-		up_read(&sb->s_umount);
-	}
-
-	sync_release_filesystems(&sync_list);
-}
-
 static int __ve_fsync_behavior(struct ve_struct *ve)
 {
 	/*
@@ -237,8 +160,9 @@ int ve_fsync_behavior(void)
 void ksys_sync(void)
 {
 	struct ve_struct *ve = get_exec_env();
-	int nowait = 0, wait = 1;
+	struct sync_arg sarg;
 
+	sarg.ve = NULL;
 	if (!ve_is_super(ve)) {
 		int fsb;
 		/*
@@ -254,22 +178,22 @@ void ksys_sync(void)
 		fsb = __ve_fsync_behavior(ve);
 		if (fsb == FSYNC_NEVER)
 			return;
-
-		if (fsb == FSYNC_FILTERED) {
-			sync_filesystems_ve(ve, nowait);
-			sync_filesystems_ve(ve, wait);
-			return;
-		}
+		if (fsb == FSYNC_FILTERED)
+			sarg.ve = ve;
 	}
 
 	wakeup_flusher_threads(WB_REASON_SYNC);
-	iterate_supers(sync_inodes_one_sb, NULL);
-	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_supers(sync_fs_one_sb, &wait);
-	iterate_bdevs(fdatawrite_one_bdev, NULL);
-	iterate_bdevs(fdatawait_one_bdev, NULL);
-	if (unlikely(laptop_mode))
-		laptop_sync_completion();
+	iterate_supers(sync_inodes_one_sb, &sarg);
+	sarg.wait = 0;
+	iterate_supers(sync_fs_one_sb, &sarg);
+	sarg.wait = 1;
+	iterate_supers(sync_fs_one_sb, &sarg);
+	if (!sarg.ve) {
+		iterate_bdevs(fdatawrite_one_bdev, NULL);
+		iterate_bdevs(fdatawait_one_bdev, NULL);
+		if (unlikely(laptop_mode))
+			laptop_sync_completion();
+	}
 }
 
 SYSCALL_DEFINE0(sync)
@@ -280,17 +204,20 @@ SYSCALL_DEFINE0(sync)
 
 static void do_sync_work(struct work_struct *work)
 {
-	int nowait = 0;
+	struct sync_arg sarg;
+
+	sarg.ve = NULL;
+	sarg.wait = 0;
 
 	/*
 	 * Sync twice to reduce the possibility we skipped some inodes / pages
 	 * because they were temporarily locked
 	 */
-	iterate_supers(sync_inodes_one_sb, &nowait);
-	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_inodes_one_sb, &sarg);
+	iterate_supers(sync_fs_one_sb, &sarg);
 	iterate_bdevs(fdatawrite_one_bdev, NULL);
-	iterate_supers(sync_inodes_one_sb, &nowait);
-	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_inodes_one_sb, &sarg);
+	iterate_supers(sync_fs_one_sb, &sarg);
 	iterate_bdevs(fdatawrite_one_bdev, NULL);
 	printk("Emergency Sync complete\n");
 	kfree(work);
-- 
2.30.2



More information about the Devel mailing list