[Devel] [PATCH vz9 v2 2/2] fs: use is_sb_ve_accessible() for VE-filtered sync
Nikita Yushchenko
nikita.yushchenko at virtuozzo.com
Tue Nov 2 17:45:42 MSK 2021
Remove dedicated implementation of VE-filtered sync.
Instead, implement VE-filtered sync within common sync code, by passing
VE to sync_inodes_one_sb() and sync_fs_one_sb(), and filtering SBs to
sync there via is_sb_ve_accessible().
This makes VE-filtered sync to include mounts from VE's non-root mount
namespaces.
https://jira.sw.ru/browse/PSBM-44684
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
fs/sync.c | 169 ++++++++++++++++--------------------------------------
1 file changed, 48 insertions(+), 121 deletions(-)
diff --git a/fs/sync.c b/fs/sync.c
index e9711a9424d9..b417c2152d0a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -71,17 +71,39 @@ int sync_filesystem(struct super_block *sb)
}
EXPORT_SYMBOL(sync_filesystem);
+struct sync_arg {
+ struct ve_struct *ve;
+ int wait;
+};
+
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
+ struct sync_arg *sarg = arg;
+
+ if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
+ return;
+
if (!sb_rdonly(sb))
sync_inodes_sb(sb);
}
static void sync_fs_one_sb(struct super_block *sb, void *arg)
{
- if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
- sb->s_op->sync_fs)
- sb->s_op->sync_fs(sb, *(int *)arg);
+ struct sync_arg *sarg = arg;
+
+ if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
+ return;
+
+ if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC)) {
+ if (sb->s_op->sync_fs)
+ sb->s_op->sync_fs(sb, sarg->wait);
+
+ /* For ve-local sync, process bdev here, since there is no easy
+ * equivalent of is_sb_ve_accessible() for bdevs
+ */
+ if (sarg->ve)
+ __sync_blockdev(sb->s_bdev, sarg->wait);
+ }
}
static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
@@ -99,105 +121,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
}
-struct sync_sb {
- struct list_head list;
- struct super_block *sb;
-};
-
-static void sync_release_filesystems(struct list_head *sync_list)
-{
- struct sync_sb *ss, *tmp;
-
- list_for_each_entry_safe(ss, tmp, sync_list, list) {
- list_del(&ss->list);
- put_super(ss->sb);
- kfree(ss);
- }
-}
-
-static int sync_filesystem_collected(struct list_head *sync_list, struct super_block *sb)
-{
- struct sync_sb *ss;
-
- list_for_each_entry(ss, sync_list, list)
- if (ss->sb == sb)
- return 1;
- return 0;
-}
-
-static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
-{
- struct mount *mnt;
- struct mnt_namespace *mnt_ns;
- struct nsproxy *ve_ns;
- struct sync_sb *ss;
- int ret = 0;
-
- BUG_ON(!list_empty(sync_list));
-
- down_read(&namespace_sem);
-
- rcu_read_lock();
- ve_ns = rcu_dereference(ve->ve_ns);
- if (!ve_ns) {
- rcu_read_unlock();
- up_read(&namespace_sem);
- return 0;
- }
- mnt_ns = ve_ns->mnt_ns;
- rcu_read_unlock();
-
- mnt = mnt_list_next(mnt_ns, &mnt_ns->list);
- while (mnt) {
- if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
- goto next;
-
- ss = kmalloc(sizeof(*ss), GFP_KERNEL);
- if (ss == NULL) {
- ret = -ENOMEM;
- break;
- }
- ss->sb = mnt->mnt.mnt_sb;
- /*
- * We hold mount point and thus can be sure, that superblock is
- * alive. And it means, that we can safely increase it's usage
- * counter.
- */
- spin_lock(&sb_lock);
- ss->sb->s_count++;
- spin_unlock(&sb_lock);
- list_add_tail(&ss->list, sync_list);
-next:
- mnt = mnt_list_next(mnt_ns, &mnt->mnt_list);
- }
- up_read(&namespace_sem);
- return ret;
-}
-
-static void sync_filesystems_ve(struct ve_struct *ve, int wait)
-{
- struct super_block *sb;
- LIST_HEAD(sync_list);
- struct sync_sb *ss;
-
- /*
- * We don't need to care about allocating failure here. At least we
- * don't need to skip sync on such error.
- * Let's sync what we collected already instead.
- */
- sync_collect_filesystems(ve, &sync_list);
-
- list_for_each_entry(ss, &sync_list, list) {
- sb = ss->sb;
- down_read(&sb->s_umount);
- if (!sb_rdonly(sb) && sb->s_root && (sb->s_flags & SB_BORN))
- __sync_filesystem(sb, wait);
- up_read(&sb->s_umount);
- }
-
- sync_release_filesystems(&sync_list);
-}
-
static int __ve_fsync_behavior(struct ve_struct *ve)
{
/*
@@ -237,8 +160,9 @@ int ve_fsync_behavior(void)
void ksys_sync(void)
{
struct ve_struct *ve = get_exec_env();
- int nowait = 0, wait = 1;
+ struct sync_arg sarg;
+ sarg.ve = NULL;
if (!ve_is_super(ve)) {
int fsb;
/*
@@ -254,22 +178,22 @@ void ksys_sync(void)
fsb = __ve_fsync_behavior(ve);
if (fsb == FSYNC_NEVER)
return;
-
- if (fsb == FSYNC_FILTERED) {
- sync_filesystems_ve(ve, nowait);
- sync_filesystems_ve(ve, wait);
- return;
- }
+ if (fsb == FSYNC_FILTERED)
+ sarg.ve = ve;
}
wakeup_flusher_threads(WB_REASON_SYNC);
- iterate_supers(sync_inodes_one_sb, NULL);
- iterate_supers(sync_fs_one_sb, &nowait);
- iterate_supers(sync_fs_one_sb, &wait);
- iterate_bdevs(fdatawrite_one_bdev, NULL);
- iterate_bdevs(fdatawait_one_bdev, NULL);
- if (unlikely(laptop_mode))
- laptop_sync_completion();
+ iterate_supers(sync_inodes_one_sb, &sarg);
+ sarg.wait = 0;
+ iterate_supers(sync_fs_one_sb, &sarg);
+ sarg.wait = 1;
+ iterate_supers(sync_fs_one_sb, &sarg);
+ if (!sarg.ve) {
+ iterate_bdevs(fdatawrite_one_bdev, NULL);
+ iterate_bdevs(fdatawait_one_bdev, NULL);
+ if (unlikely(laptop_mode))
+ laptop_sync_completion();
+ }
}
SYSCALL_DEFINE0(sync)
@@ -280,17 +204,20 @@ SYSCALL_DEFINE0(sync)
static void do_sync_work(struct work_struct *work)
{
- int nowait = 0;
+ struct sync_arg sarg;
+
+ sarg.ve = NULL;
+ sarg.wait = 0;
/*
* Sync twice to reduce the possibility we skipped some inodes / pages
* because they were temporarily locked
*/
- iterate_supers(sync_inodes_one_sb, &nowait);
- iterate_supers(sync_fs_one_sb, &nowait);
+ iterate_supers(sync_inodes_one_sb, &sarg);
+ iterate_supers(sync_fs_one_sb, &sarg);
iterate_bdevs(fdatawrite_one_bdev, NULL);
- iterate_supers(sync_inodes_one_sb, &nowait);
- iterate_supers(sync_fs_one_sb, &nowait);
+ iterate_supers(sync_inodes_one_sb, &sarg);
+ iterate_supers(sync_fs_one_sb, &sarg);
iterate_bdevs(fdatawrite_one_bdev, NULL);
printk("Emergency Sync complete\n");
kfree(work);
--
2.30.2
More information about the Devel
mailing list