[Devel] [PATCH vz9 1/5] Revert "ve/fs/sync: Per containter sync and syncfs and fs.fsync-enable sysctl"
Kirill Tkhai
ktkhai at virtuozzo.com
Mon Nov 22 10:58:02 MSK 2021
On 22.11.2021 09:20, Nikita Yushchenko wrote:
> This reverts commit e6acaa7d3aff56d652adc4121ca44d71e816f53a.
>
> Different implementation will follow.
>
> https://jira.sw.ru/browse/PSBM-44684
> Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
> fs/fcntl.c | 2 -
> fs/mount.h | 2 -
> fs/namespace.c | 8 +-
> fs/open.c | 3 -
> fs/sync.c | 213 +-------------------------------------------
> include/linux/fs.h | 12 ---
> include/linux/ve.h | 2 -
> kernel/ve/ve.c | 3 -
> kernel/ve/veowner.c | 8 --
> mm/msync.c | 2 -
> 10 files changed, 6 insertions(+), 249 deletions(-)
>
> diff --git a/fs/fcntl.c b/fs/fcntl.c
> index 8af146ea9231..2e0c8515bd1a 100644
> --- a/fs/fcntl.c
> +++ b/fs/fcntl.c
> @@ -68,8 +68,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
> if (!may_use_odirect())
> arg &= ~O_DIRECT;
>
> - if (ve_fsync_behavior() == FSYNC_NEVER)
> - arg &= ~O_SYNC;
> /*
> * O_APPEND cannot be cleared if the file is marked as append-only
> * and the file is open for write.
> diff --git a/fs/mount.h b/fs/mount.h
> index 9043614df477..ce646e66efc4 100644
> --- a/fs/mount.h
> +++ b/fs/mount.h
> @@ -101,8 +101,6 @@ static inline int is_mounted(struct vfsmount *mnt)
> return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
> }
>
> -extern struct rw_semaphore namespace_sem;
> -
> extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
>
> extern int __legitimize_mnt(struct vfsmount *, unsigned);
> diff --git a/fs/namespace.c b/fs/namespace.c
> index dc73e945b746..558fa26bde50 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -72,7 +72,7 @@ static DEFINE_IDA(mnt_group_ida);
> static struct hlist_head *mount_hashtable __read_mostly;
> static struct hlist_head *mountpoint_hashtable __read_mostly;
> static struct kmem_cache *mnt_cache __read_mostly;
> -DECLARE_RWSEM(namespace_sem);
> +static DECLARE_RWSEM(namespace_sem);
> static HLIST_HEAD(unmounted); /* protected by namespace_sem */
> static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
>
> @@ -1325,8 +1325,9 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
> return &p->mnt;
> }
>
> -struct mount *mnt_list_next(struct mnt_namespace *ns,
> - struct list_head *p)
> +#ifdef CONFIG_PROC_FS
> +static struct mount *mnt_list_next(struct mnt_namespace *ns,
> + struct list_head *p)
> {
> struct mount *mnt, *ret = NULL;
>
> @@ -1343,7 +1344,6 @@ struct mount *mnt_list_next(struct mnt_namespace *ns,
> return ret;
> }
>
> -#ifdef CONFIG_PROC_FS
> /* iterator; we want it to have access to namespace_sem, thus here... */
> static void *m_start(struct seq_file *m, loff_t *pos)
> {
> diff --git a/fs/open.c b/fs/open.c
> index 65e60aa661a8..040df8bc6e76 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -785,9 +785,6 @@ static int do_dentry_open(struct file *f,
> if (!may_use_odirect())
> f->f_flags &= ~O_DIRECT;
>
> - if (ve_fsync_behavior() == FSYNC_NEVER)
> - f->f_flags &= ~O_SYNC;
> -
> if (unlikely(f->f_flags & O_PATH)) {
> f->f_mode = FMODE_PATH | FMODE_OPENED;
> f->f_op = &empty_fops;
> diff --git a/fs/sync.c b/fs/sync.c
> index 1c78756d4749..1373a610dc78 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -8,7 +8,6 @@
> #include <linux/fs.h>
> #include <linux/slab.h>
> #include <linux/export.h>
> -#include <linux/mount.h>
> #include <linux/namei.h>
> #include <linux/sched.h>
> #include <linux/writeback.h>
> @@ -17,9 +16,7 @@
> #include <linux/pagemap.h>
> #include <linux/quotaops.h>
> #include <linux/backing-dev.h>
> -#include <linux/ve.h>
> #include "internal.h"
> -#include "mount.h"
>
> #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
> SYNC_FILE_RANGE_WAIT_AFTER)
> @@ -99,160 +96,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
> filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
> }
>
> -struct sync_sb {
> - struct list_head list;
> - struct super_block *sb;
> -};
> -
> -static void sync_release_filesystems(struct list_head *sync_list)
> -{
> - struct sync_sb *ss, *tmp;
> -
> - list_for_each_entry_safe(ss, tmp, sync_list, list) {
> - list_del(&ss->list);
> - put_super(ss->sb);
> - kfree(ss);
> - }
> -}
> -
> -static int sync_filesystem_collected(struct list_head *sync_list, struct super_block *sb)
> -{
> - struct sync_sb *ss;
> -
> - list_for_each_entry(ss, sync_list, list)
> - if (ss->sb == sb)
> - return 1;
> - return 0;
> -}
> -
> -static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
> -{
> - struct mount *mnt;
> - struct mnt_namespace *mnt_ns;
> - struct nsproxy *ve_ns;
> - struct sync_sb *ss;
> - int ret = 0;
> -
> - BUG_ON(!list_empty(sync_list));
> -
> - down_read(&namespace_sem);
> -
> - rcu_read_lock();
> - ve_ns = rcu_dereference(ve->ve_ns);
> - if (!ve_ns) {
> - rcu_read_unlock();
> - up_read(&namespace_sem);
> - return 0;
> - }
> - mnt_ns = ve_ns->mnt_ns;
> - rcu_read_unlock();
> -
> - mnt = mnt_list_next(mnt_ns, &mnt_ns->list);
> - while (mnt) {
> - if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
> - goto next;
> -
> - ss = kmalloc(sizeof(*ss), GFP_KERNEL);
> - if (ss == NULL) {
> - ret = -ENOMEM;
> - break;
> - }
> - ss->sb = mnt->mnt.mnt_sb;
> - /*
> - * We hold mount point and thus can be sure, that superblock is
> - * alive. And it means, that we can safely increase it's usage
> - * counter.
> - */
> - spin_lock(&sb_lock);
> - ss->sb->s_count++;
> - spin_unlock(&sb_lock);
> - list_add_tail(&ss->list, sync_list);
> -next:
> - mnt = mnt_list_next(mnt_ns, &mnt->mnt_list);
> - }
> - up_read(&namespace_sem);
> - return ret;
> -}
> -
> -static void sync_filesystems_ve(struct ve_struct *ve, int wait)
> -{
> - struct super_block *sb;
> - LIST_HEAD(sync_list);
> - struct sync_sb *ss;
> -
> - /*
> - * We don't need to care about allocating failure here. At least we
> - * don't need to skip sync on such error.
> - * Let's sync what we collected already instead.
> - */
> - sync_collect_filesystems(ve, &sync_list);
> -
> - list_for_each_entry(ss, &sync_list, list) {
> - sb = ss->sb;
> - down_read(&sb->s_umount);
> - if (!sb_rdonly(sb) && sb->s_root && (sb->s_flags & SB_BORN))
> - __sync_filesystem(sb, wait);
> - up_read(&sb->s_umount);
> - }
> -
> - sync_release_filesystems(&sync_list);
> -}
> -
> -static int is_sb_ve_accessible(struct ve_struct *ve, struct super_block *sb)
> -{
> - struct mount *mnt;
> - struct mnt_namespace *mnt_ns;
> - struct nsproxy *ve_ns;
> - int ret = 0;
> -
> - down_read(&namespace_sem);
> -
> - rcu_read_lock();
> - ve_ns = rcu_dereference(ve->ve_ns);
> - if (!ve_ns) {
> - rcu_read_unlock();
> - up_read(&namespace_sem);
> - return 0;
> - }
> - mnt_ns = ve_ns->mnt_ns;
> - rcu_read_unlock();
> -
> - list_for_each_entry(mnt, &mnt_ns->list, mnt_list) {
> - if (mnt->mnt.mnt_sb == sb) {
> - ret = 1;
> - break;
> - }
> - }
> - up_read(&namespace_sem);
> - return ret;
> -}
> -
> -static int __ve_fsync_behavior(struct ve_struct *ve)
> -{
> - /*
> - * - __ve_fsync_behavior() is not called for ve0
> - * - FSYNC_FILTERED for veX does NOT mean "filtered" behavior
> - * - FSYNC_FILTERED for veX means "get value from ve0"
> - */
> - if (ve->fsync_enable == FSYNC_FILTERED)
> - return get_ve0()->fsync_enable;
> - else if (ve->fsync_enable)
> - return FSYNC_FILTERED; /* sync forced by ve is always filtered */
> - else
> - return 0;
> -}
> -
> -int ve_fsync_behavior(void)
> -{
> - struct ve_struct *ve;
> -
> - ve = get_exec_env();
> - if (ve_is_super(ve))
> - return FSYNC_ALWAYS;
> - else
> - return __ve_fsync_behavior(ve);
> -}
> -
> /*
> * Sync everything. We start by waking flusher threads so that most of
> * writeback runs on all devices in parallel. Then we sync all inodes reliably
> @@ -265,32 +108,8 @@ int ve_fsync_behavior(void)
> */
> void ksys_sync(void)
> {
> - struct ve_struct *ve = get_exec_env();
> int nowait = 0, wait = 1;
>
> - if (!ve_is_super(ve)) {
> - int fsb;
> - /*
> - * init can't sync during VE stop. Rationale:
> - * - NFS with -o hard will block forever as network is down
> - * - no useful job is performed as VE0 will call umount/sync
> - * by his own later
> - * Den
> - */
> - if (is_child_reaper(task_pid(current)))
> - return;
> -
> - fsb = __ve_fsync_behavior(ve);
> - if (fsb == FSYNC_NEVER)
> - return;
> -
> - if (fsb == FSYNC_FILTERED) {
> - sync_filesystems_ve(ve, nowait);
> - sync_filesystems_ve(ve, wait);
> - return;
> - }
> - }
> -
> wakeup_flusher_threads(WB_REASON_SYNC);
> iterate_supers(sync_inodes_one_sb, NULL);
> iterate_supers(sync_fs_one_sb, &nowait);
> @@ -343,42 +162,18 @@ SYSCALL_DEFINE1(syncfs, int, fd)
> {
> struct fd f = fdget(fd);
> struct super_block *sb;
> - int ret = 0, ret2 = 0;
> - struct ve_struct *ve;
> + int ret, ret2;
>
> if (!f.file)
> return -EBADF;
> sb = f.file->f_path.dentry->d_sb;
>
> - ve = get_exec_env();
> -
> - if (!ve_is_super(ve)) {
> - int fsb;
> - /*
> - * init can't sync during VE stop. Rationale:
> - * - NFS with -o hard will block forever as network is down
> - * - no useful job is performed as VE0 will call umount/sync
> - * by his own later
> - * Den
> - */
> - if (is_child_reaper(task_pid(current)))
> - goto fdput;
> -
> - fsb = __ve_fsync_behavior(ve);
> - if (fsb == FSYNC_NEVER)
> - goto fdput;
> -
> - if ((fsb == FSYNC_FILTERED) && !is_sb_ve_accessible(ve, sb))
> - goto fdput;
> - }
> -
> down_read(&sb->s_umount);
> ret = sync_filesystem(sb);
> up_read(&sb->s_umount);
>
> ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
>
> -fdput:
> fdput(f);
> return ret ? ret : ret2;
> }
> @@ -422,13 +217,9 @@ EXPORT_SYMBOL(vfs_fsync);
>
> static int do_fsync(unsigned int fd, int datasync)
> {
> - struct fd f;
> + struct fd f = fdget(fd);
> int ret = -EBADF;
>
> - if (ve_fsync_behavior() == FSYNC_NEVER)
> - return 0;
> -
> - f = fdget(fd);
> if (f.file) {
> ret = vfs_fsync(f.file, datasync);
> fdput(f);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 42021f0151f5..01419dbd864b 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -71,7 +71,6 @@ struct fsverity_operations;
> struct fs_context;
> struct fs_parameter_spec;
> struct fileattr;
> -struct mnt_namespace;
>
> extern void __init inode_init(void);
> extern void __init inode_init_early(void);
> @@ -2522,7 +2521,6 @@ void kill_anon_super(struct super_block *sb);
> void kill_litter_super(struct super_block *sb);
> void deactivate_super(struct super_block *sb);
> void deactivate_locked_super(struct super_block *sb);
> -void put_super(struct super_block *sb);
> int set_anon_super(struct super_block *s, void *data);
> int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
> int get_anon_bdev(dev_t *);
> @@ -3148,13 +3146,6 @@ extern bool path_is_under(const struct path *, const struct path *);
>
> extern char *file_path(struct file *, char *, int);
>
> -int ve_fsync_behavior(void);
> -
> -#define FSYNC_NEVER 0 /* ve syncs are ignored */
> -#define FSYNC_ALWAYS 1 /* ve syncs work as ususal */
> -#define FSYNC_FILTERED 2 /* ve syncs only its files */
> -/* For non-ve0 FSYNC_FILTERED value means "get value from ve0". */
> -
> #include <linux/err.h>
>
> /* needed for stackable file system support */
> @@ -3504,9 +3495,6 @@ void setattr_copy(struct user_namespace *, struct inode *inode,
>
> extern int file_update_time(struct file *file);
>
> -extern struct mount *mnt_list_next(struct mnt_namespace *ns,
> - struct list_head *p);
> -
> static inline bool vma_is_dax(const struct vm_area_struct *vma)
> {
> return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 4c8f7d308829..1a66063d9ba8 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -61,8 +61,6 @@ struct ve_struct {
> struct kstat_lat_pcpu_struct sched_lat_ve;
> int odirect_enable;
>
> - int fsync_enable;
> -
> #if IS_ENABLED(CONFIG_BINFMT_MISC)
> struct binfmt_misc *binfmt_misc;
> #endif
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 557a14f216c4..48be49337a87 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -70,7 +70,6 @@ struct ve_struct ve0 = {
> .sched_lat_ve.cur = &ve0_lat_stats,
> .netns_avail_nr = ATOMIC_INIT(INT_MAX),
> .netns_max_nr = INT_MAX,
> - .fsync_enable = FSYNC_FILTERED,
> ._randomize_va_space =
> #ifdef CONFIG_COMPAT_BRK
> 1,
> @@ -931,8 +930,6 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
> ve->meminfo_val = VE_MEMINFO_DEFAULT;
>
> ve->odirect_enable = 2;
> - /* for veX FSYNC_FILTERED means "get value from ve0 */
> - ve->fsync_enable = FSYNC_FILTERED;
>
> atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
> ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
> diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
> index e255fe57d447..b0aba35b6be9 100644
> --- a/kernel/ve/veowner.c
> +++ b/kernel/ve/veowner.c
> @@ -7,7 +7,6 @@
> *
> */
>
> -#include <linux/ve.h>
> #include <linux/init.h>
> #include <linux/module.h>
> #include <linux/proc_fs.h>
> @@ -67,13 +66,6 @@ static struct ctl_table vz_fs_table[] = {
> .extra1 = &ve_mount_nr_min,
> .extra2 = &ve_mount_nr_max,
> },
> - {
> - .procname = "fsync-enable",
> - .data = &ve0.fsync_enable,
> - .maxlen = sizeof(int),
> - .mode = 0644 | S_ISVTX,
> - .proc_handler = &proc_dointvec_virtual,
> - },
> { }
> };
>
> diff --git a/mm/msync.c b/mm/msync.c
> index 20737eb4b76b..137d1c104f3e 100644
> --- a/mm/msync.c
> +++ b/mm/msync.c
> @@ -51,8 +51,6 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
> if (end < start)
> goto out;
> error = 0;
> - if (ve_fsync_behavior() == FSYNC_NEVER)
> - goto out;
> if (end == start)
> goto out;
> /*
>
More information about the Devel
mailing list