[Devel] [PATCH vz9 1/5] Revert "ve/fs/sync: Per containter sync and syncfs and fs.fsync-enable sysctl"

Nikita Yushchenko nikita.yushchenko at virtuozzo.com
Mon Nov 22 09:20:28 MSK 2021


This reverts commit e6acaa7d3aff56d652adc4121ca44d71e816f53a.

Different implementation will follow.

https://jira.sw.ru/browse/PSBM-44684
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 fs/fcntl.c          |   2 -
 fs/mount.h          |   2 -
 fs/namespace.c      |   8 +-
 fs/open.c           |   3 -
 fs/sync.c           | 213 +-------------------------------------------
 include/linux/fs.h  |  12 ---
 include/linux/ve.h  |   2 -
 kernel/ve/ve.c      |   3 -
 kernel/ve/veowner.c |   8 --
 mm/msync.c          |   2 -
 10 files changed, 6 insertions(+), 249 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 8af146ea9231..2e0c8515bd1a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -68,8 +68,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 	if (!may_use_odirect())
 		arg &= ~O_DIRECT;
 
-	if (ve_fsync_behavior() == FSYNC_NEVER)
-		arg &= ~O_SYNC;
 	/*
 	 * O_APPEND cannot be cleared if the file is marked as append-only
 	 * and the file is open for write.
diff --git a/fs/mount.h b/fs/mount.h
index 9043614df477..ce646e66efc4 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -101,8 +101,6 @@ static inline int is_mounted(struct vfsmount *mnt)
 	return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
 }
 
-extern struct rw_semaphore namespace_sem;
-
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
 
 extern int __legitimize_mnt(struct vfsmount *, unsigned);
diff --git a/fs/namespace.c b/fs/namespace.c
index dc73e945b746..558fa26bde50 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -72,7 +72,7 @@ static DEFINE_IDA(mnt_group_ida);
 static struct hlist_head *mount_hashtable __read_mostly;
 static struct hlist_head *mountpoint_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
-DECLARE_RWSEM(namespace_sem);
+static DECLARE_RWSEM(namespace_sem);
 static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
 
@@ -1325,8 +1325,9 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
 	return &p->mnt;
 }
 
-struct mount *mnt_list_next(struct mnt_namespace *ns,
-			    struct list_head *p)
+#ifdef CONFIG_PROC_FS
+static struct mount *mnt_list_next(struct mnt_namespace *ns,
+				   struct list_head *p)
 {
 	struct mount *mnt, *ret = NULL;
 
@@ -1343,7 +1344,6 @@ struct mount *mnt_list_next(struct mnt_namespace *ns,
 	return ret;
 }
 
-#ifdef CONFIG_PROC_FS
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
diff --git a/fs/open.c b/fs/open.c
index 65e60aa661a8..040df8bc6e76 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -785,9 +785,6 @@ static int do_dentry_open(struct file *f,
 	if (!may_use_odirect())
 		f->f_flags &= ~O_DIRECT;
 
-	if (ve_fsync_behavior() == FSYNC_NEVER)
-		f->f_flags &= ~O_SYNC;
-
 	if (unlikely(f->f_flags & O_PATH)) {
 		f->f_mode = FMODE_PATH | FMODE_OPENED;
 		f->f_op = &empty_fops;
diff --git a/fs/sync.c b/fs/sync.c
index 1c78756d4749..1373a610dc78 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -8,7 +8,6 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/export.h>
-#include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/sched.h>
 #include <linux/writeback.h>
@@ -17,9 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/backing-dev.h>
-#include <linux/ve.h>
 #include "internal.h"
-#include "mount.h"
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
@@ -99,160 +96,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 	filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
 }
 
-struct sync_sb {
-	struct list_head list;
-	struct super_block *sb;
-};
-
-static void sync_release_filesystems(struct list_head *sync_list)
-{
-	struct sync_sb *ss, *tmp;
-
-	list_for_each_entry_safe(ss, tmp, sync_list, list) {
-		list_del(&ss->list);
-		put_super(ss->sb);
-		kfree(ss);
-	}
-}
-
-static int sync_filesystem_collected(struct list_head *sync_list, struct super_block *sb)
-{
-	struct sync_sb *ss;
-
-	list_for_each_entry(ss, sync_list, list)
-		if (ss->sb == sb)
-			return 1;
-	return 0;
-}
-
-static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
-{
-	struct mount *mnt;
-	struct mnt_namespace *mnt_ns;
-	struct nsproxy *ve_ns;
-	struct sync_sb *ss;
-	int ret = 0;
-
-	BUG_ON(!list_empty(sync_list));
-
-	down_read(&namespace_sem);
-
-	rcu_read_lock();
-	ve_ns = rcu_dereference(ve->ve_ns);
-	if (!ve_ns) {
-		rcu_read_unlock();
-		up_read(&namespace_sem);
-		return 0;
-	}
-	mnt_ns = ve_ns->mnt_ns;
-	rcu_read_unlock();
-
-	mnt = mnt_list_next(mnt_ns, &mnt_ns->list);
-	while (mnt) {
-		if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
-			goto next;
-
-		ss = kmalloc(sizeof(*ss), GFP_KERNEL);
-		if (ss == NULL) {
-			ret = -ENOMEM;
-			break;
-		}
-		ss->sb = mnt->mnt.mnt_sb;
-		/*
-		 * We hold mount point and thus can be sure, that superblock is
-		 * alive. And it means, that we can safely increase it's usage
-		 * counter.
-		 */
-		spin_lock(&sb_lock);
-		ss->sb->s_count++;
-		spin_unlock(&sb_lock);
-		list_add_tail(&ss->list, sync_list);
-next:
-		mnt = mnt_list_next(mnt_ns, &mnt->mnt_list);
-	}
-	up_read(&namespace_sem);
-	return ret;
-}
-
-static void sync_filesystems_ve(struct ve_struct *ve, int wait)
-{
-	struct super_block *sb;
-	LIST_HEAD(sync_list);
-	struct sync_sb *ss;
-
-	/*
-	 * We don't need to care about allocating failure here. At least we
-	 * don't need to skip sync on such error.
-	 * Let's sync what we collected already instead.
-	 */
-	sync_collect_filesystems(ve, &sync_list);
-
-	list_for_each_entry(ss, &sync_list, list) {
-		sb = ss->sb;
-		down_read(&sb->s_umount);
-		if (!sb_rdonly(sb) && sb->s_root && (sb->s_flags & SB_BORN))
-			__sync_filesystem(sb, wait);
-		up_read(&sb->s_umount);
-	}
-
-	sync_release_filesystems(&sync_list);
-}
-
-static int is_sb_ve_accessible(struct ve_struct *ve, struct super_block *sb)
-{
-	struct mount *mnt;
-	struct mnt_namespace *mnt_ns;
-	struct nsproxy *ve_ns;
-	int ret = 0;
-
-	down_read(&namespace_sem);
-
-	rcu_read_lock();
-	ve_ns = rcu_dereference(ve->ve_ns);
-	if (!ve_ns) {
-		rcu_read_unlock();
-		up_read(&namespace_sem);
-		return 0;
-	}
-	mnt_ns = ve_ns->mnt_ns;
-	rcu_read_unlock();
-
-	list_for_each_entry(mnt, &mnt_ns->list, mnt_list) {
-		if (mnt->mnt.mnt_sb == sb) {
-			ret = 1;
-			break;
-		}
-	}
-	up_read(&namespace_sem);
-	return ret;
-}
-
-static int __ve_fsync_behavior(struct ve_struct *ve)
-{
-	/*
-	 * - __ve_fsync_behavior() is not called for ve0
-	 * - FSYNC_FILTERED for veX does NOT mean "filtered" behavior
-	 * - FSYNC_FILTERED for veX means "get value from ve0"
-	 */
-	if (ve->fsync_enable == FSYNC_FILTERED)
-		return get_ve0()->fsync_enable;
-	else if (ve->fsync_enable)
-		return FSYNC_FILTERED; /* sync forced by ve is always filtered */
-	else
-		return 0;
-}
-
-int ve_fsync_behavior(void)
-{
-	struct ve_struct *ve;
-
-	ve = get_exec_env();
-	if (ve_is_super(ve))
-		return FSYNC_ALWAYS;
-	else
-		return __ve_fsync_behavior(ve);
-}
-
 /*
  * Sync everything. We start by waking flusher threads so that most of
  * writeback runs on all devices in parallel. Then we sync all inodes reliably
@@ -265,32 +108,8 @@ int ve_fsync_behavior(void)
  */
 void ksys_sync(void)
 {
-	struct ve_struct *ve = get_exec_env();
 	int nowait = 0, wait = 1;
 
-	if (!ve_is_super(ve)) {
-		int fsb;
-		/*
-		 * init can't sync during VE stop. Rationale:
-		 *  - NFS with -o hard will block forever as network is down
-		 *  - no useful job is performed as VE0 will call umount/sync
-		 *    by his own later
-		 *  Den
-		 */
-		if (is_child_reaper(task_pid(current)))
-			return;
-
-		fsb = __ve_fsync_behavior(ve);
-		if (fsb == FSYNC_NEVER)
-			return;
-
-		if (fsb == FSYNC_FILTERED) {
-			sync_filesystems_ve(ve, nowait);
-			sync_filesystems_ve(ve, wait);
-			return;
-		}
-	}
-
 	wakeup_flusher_threads(WB_REASON_SYNC);
 	iterate_supers(sync_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &nowait);
@@ -343,42 +162,18 @@ SYSCALL_DEFINE1(syncfs, int, fd)
 {
 	struct fd f = fdget(fd);
 	struct super_block *sb;
-	int ret = 0, ret2 = 0;
-	struct ve_struct *ve;
+	int ret, ret2;
 
 	if (!f.file)
 		return -EBADF;
 	sb = f.file->f_path.dentry->d_sb;
 
-	ve = get_exec_env();
-
-	if (!ve_is_super(ve)) {
-		int fsb;
-		/*
-		 * init can't sync during VE stop. Rationale:
-		 *  - NFS with -o hard will block forever as network is down
-		 *  - no useful job is performed as VE0 will call umount/sync
-		 *    by his own later
-		 *  Den
-		 */
-		if (is_child_reaper(task_pid(current)))
-			goto fdput;
-
-		fsb = __ve_fsync_behavior(ve);
-		if (fsb == FSYNC_NEVER)
-			goto fdput;
-
-		if ((fsb == FSYNC_FILTERED) && !is_sb_ve_accessible(ve, sb))
-			goto fdput;
-	}
-
 	down_read(&sb->s_umount);
 	ret = sync_filesystem(sb);
 	up_read(&sb->s_umount);
 
 	ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
 
-fdput:
 	fdput(f);
 	return ret ? ret : ret2;
 }
@@ -422,13 +217,9 @@ EXPORT_SYMBOL(vfs_fsync);
 
 static int do_fsync(unsigned int fd, int datasync)
 {
-	struct fd f;
+	struct fd f = fdget(fd);
 	int ret = -EBADF;
 
-	if (ve_fsync_behavior() == FSYNC_NEVER)
-		return 0;
-
-	f = fdget(fd);
 	if (f.file) {
 		ret = vfs_fsync(f.file, datasync);
 		fdput(f);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 42021f0151f5..01419dbd864b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -71,7 +71,6 @@ struct fsverity_operations;
 struct fs_context;
 struct fs_parameter_spec;
 struct fileattr;
-struct mnt_namespace;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -2522,7 +2521,6 @@ void kill_anon_super(struct super_block *sb);
 void kill_litter_super(struct super_block *sb);
 void deactivate_super(struct super_block *sb);
 void deactivate_locked_super(struct super_block *sb);
-void put_super(struct super_block *sb);
 int set_anon_super(struct super_block *s, void *data);
 int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
 int get_anon_bdev(dev_t *);
@@ -3148,13 +3146,6 @@ extern bool path_is_under(const struct path *, const struct path *);
 
 extern char *file_path(struct file *, char *, int);
 
-int ve_fsync_behavior(void);
-
-#define FSYNC_NEVER	0	/* ve syncs are ignored    */
-#define FSYNC_ALWAYS	1	/* ve syncs work as ususal */
-#define FSYNC_FILTERED	2	/* ve syncs only its files */
-/* For non-ve0 FSYNC_FILTERED value means "get value from ve0". */
-
 #include <linux/err.h>
 
 /* needed for stackable file system support */
@@ -3504,9 +3495,6 @@ void setattr_copy(struct user_namespace *, struct inode *inode,
 
 extern int file_update_time(struct file *file);
 
-extern struct mount *mnt_list_next(struct mnt_namespace *ns,
-				   struct list_head *p);
-
 static inline bool vma_is_dax(const struct vm_area_struct *vma)
 {
 	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 4c8f7d308829..1a66063d9ba8 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -61,8 +61,6 @@ struct ve_struct {
 	struct kstat_lat_pcpu_struct    sched_lat_ve;
 	int			odirect_enable;
 
-	int			fsync_enable;
-
 #if IS_ENABLED(CONFIG_BINFMT_MISC)
 	struct binfmt_misc	*binfmt_misc;
 #endif
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 557a14f216c4..48be49337a87 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -70,7 +70,6 @@ struct ve_struct ve0 = {
 	.sched_lat_ve.cur	= &ve0_lat_stats,
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.netns_max_nr		= INT_MAX,
-	.fsync_enable		= FSYNC_FILTERED,
 	._randomize_va_space	=
 #ifdef CONFIG_COMPAT_BRK
 					1,
@@ -931,8 +930,6 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	ve->meminfo_val = VE_MEMINFO_DEFAULT;
 
 	ve->odirect_enable = 2;
-	/* for veX FSYNC_FILTERED means "get value from ve0 */
-	ve->fsync_enable = FSYNC_FILTERED;
 
 	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
 	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
index e255fe57d447..b0aba35b6be9 100644
--- a/kernel/ve/veowner.c
+++ b/kernel/ve/veowner.c
@@ -7,7 +7,6 @@
  *
  */
 
-#include <linux/ve.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
@@ -67,13 +66,6 @@ static struct ctl_table vz_fs_table[] = {
 		.extra1		= &ve_mount_nr_min,
 		.extra2		= &ve_mount_nr_max,
 	},
-	{
-		.procname	= "fsync-enable",
-		.data		= &ve0.fsync_enable,
-		.maxlen		= sizeof(int),
-		.mode		= 0644 | S_ISVTX,
-		.proc_handler	= &proc_dointvec_virtual,
-	},
 	{ }
 };
 
diff --git a/mm/msync.c b/mm/msync.c
index 20737eb4b76b..137d1c104f3e 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -51,8 +51,6 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 	if (end < start)
 		goto out;
 	error = 0;
-	if (ve_fsync_behavior() == FSYNC_NEVER)
-		goto out;
 	if (end == start)
 		goto out;
 	/*
-- 
2.30.2



More information about the Devel mailing list