[Devel] [PATCH vz9 2/5] ve: support per-VE disable of filesystem sync operations
Kirill Tkhai
ktkhai at virtuozzo.com
Mon Nov 22 10:58:33 MSK 2021
On 22.11.2021 09:20, Nikita Yushchenko wrote:
> Split out of vz7/vz8 support of per-ve syncs.
>
> https://jira.sw.ru/browse/PSBM-44684
> Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
> fs/fcntl.c | 2 ++
> fs/open.c | 3 +++
> fs/sync.c | 23 +++++++++++++++++++++--
> include/linux/fs.h | 12 ++++++++++++
> include/linux/ve.h | 2 ++
> kernel/ve/ve.c | 2 ++
> kernel/ve/veowner.c | 8 ++++++++
> mm/msync.c | 2 ++
> 8 files changed, 52 insertions(+), 2 deletions(-)
>
> diff --git a/fs/fcntl.c b/fs/fcntl.c
> index 2e0c8515bd1a..8af146ea9231 100644
> --- a/fs/fcntl.c
> +++ b/fs/fcntl.c
> @@ -68,6 +68,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
> if (!may_use_odirect())
> arg &= ~O_DIRECT;
>
> + if (ve_fsync_behavior() == FSYNC_NEVER)
> + arg &= ~O_SYNC;
> /*
> * O_APPEND cannot be cleared if the file is marked as append-only
> * and the file is open for write.
> diff --git a/fs/open.c b/fs/open.c
> index 040df8bc6e76..65e60aa661a8 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -785,6 +785,9 @@ static int do_dentry_open(struct file *f,
> if (!may_use_odirect())
> f->f_flags &= ~O_DIRECT;
>
> + if (ve_fsync_behavior() == FSYNC_NEVER)
> + f->f_flags &= ~O_SYNC;
> +
> if (unlikely(f->f_flags & O_PATH)) {
> f->f_mode = FMODE_PATH | FMODE_OPENED;
> f->f_op = &empty_fops;
> diff --git a/fs/sync.c b/fs/sync.c
> index 1373a610dc78..f57e22fb118f 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -16,6 +16,7 @@
> #include <linux/pagemap.h>
> #include <linux/quotaops.h>
> #include <linux/backing-dev.h>
> +#include <linux/ve.h>
> #include "internal.h"
>
> #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
> @@ -96,6 +97,17 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
> filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
> }
>
> +int ve_fsync_behavior(void)
> +{
> + struct ve_struct *ve;
> +
> + ve = get_exec_env();
> + if (ve_is_super(ve))
> + return FSYNC_ALWAYS;
> + else
> + return ve->fsync_enable;
> +}
> +
> /*
> * Sync everything. We start by waking flusher threads so that most of
> * writeback runs on all devices in parallel. Then we sync all inodes reliably
> @@ -110,6 +122,9 @@ void ksys_sync(void)
> {
> int nowait = 0, wait = 1;
>
> + if (ve_fsync_behavior() == FSYNC_NEVER)
> + return;
> +
> wakeup_flusher_threads(WB_REASON_SYNC);
> iterate_supers(sync_inodes_one_sb, NULL);
> iterate_supers(sync_fs_one_sb, &nowait);
> @@ -162,18 +177,22 @@ SYSCALL_DEFINE1(syncfs, int, fd)
> {
> struct fd f = fdget(fd);
> struct super_block *sb;
> - int ret, ret2;
> + int ret = 0, ret2 = 0;
>
> if (!f.file)
> return -EBADF;
> sb = f.file->f_path.dentry->d_sb;
>
> + if (ve_fsync_behavior() == FSYNC_NEVER)
> + goto fdput;
> +
> down_read(&sb->s_umount);
> ret = sync_filesystem(sb);
> up_read(&sb->s_umount);
>
> ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
>
> +fdput:
> fdput(f);
> return ret ? ret : ret2;
> }
> @@ -220,7 +239,7 @@ static int do_fsync(unsigned int fd, int datasync)
> struct fd f = fdget(fd);
> int ret = -EBADF;
>
> - if (f.file) {
> + if (f.file != NULL && ve_fsync_behavior() != FSYNC_NEVER) {
> ret = vfs_fsync(f.file, datasync);
> fdput(f);
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 01419dbd864b..fb21d1a32cdb 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3146,6 +3146,18 @@ extern bool path_is_under(const struct path *, const struct path *);
>
> extern char *file_path(struct file *, char *, int);
>
> +#define FSYNC_NEVER 0 /* ve syncs are ignored */
> +#define FSYNC_ALWAYS 1 /* ve syncs work as ususal */
> +
> +#ifdef CONFIG_VE
> +int ve_fsync_behavior(void);
> +#else
> +static inline int ve_fsync_behavior(void)
> +{
> + return FSYNC_ALWAYS;
> +}
> +#endif
> +
> #include <linux/err.h>
>
> /* needed for stackable file system support */
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 1a66063d9ba8..4c8f7d308829 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -61,6 +61,8 @@ struct ve_struct {
> struct kstat_lat_pcpu_struct sched_lat_ve;
> int odirect_enable;
>
> + int fsync_enable;
> +
> #if IS_ENABLED(CONFIG_BINFMT_MISC)
> struct binfmt_misc *binfmt_misc;
> #endif
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 48be49337a87..e94aa90aff25 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -70,6 +70,7 @@ struct ve_struct ve0 = {
> .sched_lat_ve.cur = &ve0_lat_stats,
> .netns_avail_nr = ATOMIC_INIT(INT_MAX),
> .netns_max_nr = INT_MAX,
> + .fsync_enable = FSYNC_ALWAYS,
> ._randomize_va_space =
> #ifdef CONFIG_COMPAT_BRK
> 1,
> @@ -930,6 +931,7 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
> ve->meminfo_val = VE_MEMINFO_DEFAULT;
>
> ve->odirect_enable = 2;
> + ve->fsync_enable = FSYNC_ALWAYS;
>
> atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
> ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
> diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
> index b0aba35b6be9..e255fe57d447 100644
> --- a/kernel/ve/veowner.c
> +++ b/kernel/ve/veowner.c
> @@ -7,6 +7,7 @@
> *
> */
>
> +#include <linux/ve.h>
> #include <linux/init.h>
> #include <linux/module.h>
> #include <linux/proc_fs.h>
> @@ -66,6 +67,13 @@ static struct ctl_table vz_fs_table[] = {
> .extra1 = &ve_mount_nr_min,
> .extra2 = &ve_mount_nr_max,
> },
> + {
> + .procname = "fsync-enable",
> + .data = &ve0.fsync_enable,
> + .maxlen = sizeof(int),
> + .mode = 0644 | S_ISVTX,
> + .proc_handler = &proc_dointvec_virtual,
> + },
> { }
> };
>
> diff --git a/mm/msync.c b/mm/msync.c
> index 137d1c104f3e..20737eb4b76b 100644
> --- a/mm/msync.c
> +++ b/mm/msync.c
> @@ -51,6 +51,8 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
> if (end < start)
> goto out;
> error = 0;
> + if (ve_fsync_behavior() == FSYNC_NEVER)
> + goto out;
> if (end == start)
> goto out;
> /*
>
More information about the Devel
mailing list