[Devel] [PATCH RHEL9 COMMIT] Revert "ve/fs/sync: Per containter sync and syncfs and fs.fsync-enable sysctl"
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Dec 8 19:06:44 MSK 2021
The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.10.34
------>
commit 746f7db6ef45e322759c8e16c4bddca5dd2d4076
Author: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
Date: Wed Dec 8 19:06:44 2021 +0300
Revert "ve/fs/sync: Per containter sync and syncfs and fs.fsync-enable sysctl"
This reverts commit e6acaa7d3aff56d652adc4121ca44d71e816f53a.
Different implementation will follow.
https://jira.sw.ru/browse/PSBM-44684
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
fs/fcntl.c | 2 -
fs/mount.h | 2 -
fs/namespace.c | 8 +-
fs/open.c | 3 -
fs/sync.c | 213 +---------------------------------------------------
include/linux/fs.h | 12 ---
include/linux/ve.h | 2 -
kernel/ve/ve.c | 3 -
kernel/ve/veowner.c | 8 --
mm/msync.c | 2 -
10 files changed, 6 insertions(+), 249 deletions(-)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 8af146ea9231..2e0c8515bd1a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -68,8 +68,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
if (!may_use_odirect())
arg &= ~O_DIRECT;
- if (ve_fsync_behavior() == FSYNC_NEVER)
- arg &= ~O_SYNC;
/*
* O_APPEND cannot be cleared if the file is marked as append-only
* and the file is open for write.
diff --git a/fs/mount.h b/fs/mount.h
index 9043614df477..ce646e66efc4 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -101,8 +101,6 @@ static inline int is_mounted(struct vfsmount *mnt)
return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
}
-extern struct rw_semaphore namespace_sem;
-
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
extern int __legitimize_mnt(struct vfsmount *, unsigned);
diff --git a/fs/namespace.c b/fs/namespace.c
index dc73e945b746..558fa26bde50 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -72,7 +72,7 @@ static DEFINE_IDA(mnt_group_ida);
static struct hlist_head *mount_hashtable __read_mostly;
static struct hlist_head *mountpoint_hashtable __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
-DECLARE_RWSEM(namespace_sem);
+static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
@@ -1325,8 +1325,9 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
return &p->mnt;
}
-struct mount *mnt_list_next(struct mnt_namespace *ns,
- struct list_head *p)
+#ifdef CONFIG_PROC_FS
+static struct mount *mnt_list_next(struct mnt_namespace *ns,
+ struct list_head *p)
{
struct mount *mnt, *ret = NULL;
@@ -1343,7 +1344,6 @@ struct mount *mnt_list_next(struct mnt_namespace *ns,
return ret;
}
-#ifdef CONFIG_PROC_FS
/* iterator; we want it to have access to namespace_sem, thus here... */
static void *m_start(struct seq_file *m, loff_t *pos)
{
diff --git a/fs/open.c b/fs/open.c
index 65e60aa661a8..040df8bc6e76 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -785,9 +785,6 @@ static int do_dentry_open(struct file *f,
if (!may_use_odirect())
f->f_flags &= ~O_DIRECT;
- if (ve_fsync_behavior() == FSYNC_NEVER)
- f->f_flags &= ~O_SYNC;
-
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH | FMODE_OPENED;
f->f_op = &empty_fops;
diff --git a/fs/sync.c b/fs/sync.c
index 1c78756d4749..1373a610dc78 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -8,7 +8,6 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/export.h>
-#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/sched.h>
#include <linux/writeback.h>
@@ -17,9 +16,7 @@
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/backing-dev.h>
-#include <linux/ve.h>
#include "internal.h"
-#include "mount.h"
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
SYNC_FILE_RANGE_WAIT_AFTER)
@@ -99,160 +96,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
}
-struct sync_sb {
- struct list_head list;
- struct super_block *sb;
-};
-
-static void sync_release_filesystems(struct list_head *sync_list)
-{
- struct sync_sb *ss, *tmp;
-
- list_for_each_entry_safe(ss, tmp, sync_list, list) {
- list_del(&ss->list);
- put_super(ss->sb);
- kfree(ss);
- }
-}
-
-static int sync_filesystem_collected(struct list_head *sync_list, struct super_block *sb)
-{
- struct sync_sb *ss;
-
- list_for_each_entry(ss, sync_list, list)
- if (ss->sb == sb)
- return 1;
- return 0;
-}
-
-static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
-{
- struct mount *mnt;
- struct mnt_namespace *mnt_ns;
- struct nsproxy *ve_ns;
- struct sync_sb *ss;
- int ret = 0;
-
- BUG_ON(!list_empty(sync_list));
-
- down_read(&namespace_sem);
-
- rcu_read_lock();
- ve_ns = rcu_dereference(ve->ve_ns);
- if (!ve_ns) {
- rcu_read_unlock();
- up_read(&namespace_sem);
- return 0;
- }
- mnt_ns = ve_ns->mnt_ns;
- rcu_read_unlock();
-
- mnt = mnt_list_next(mnt_ns, &mnt_ns->list);
- while (mnt) {
- if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
- goto next;
-
- ss = kmalloc(sizeof(*ss), GFP_KERNEL);
- if (ss == NULL) {
- ret = -ENOMEM;
- break;
- }
- ss->sb = mnt->mnt.mnt_sb;
- /*
- * We hold mount point and thus can be sure, that superblock is
- * alive. And it means, that we can safely increase it's usage
- * counter.
- */
- spin_lock(&sb_lock);
- ss->sb->s_count++;
- spin_unlock(&sb_lock);
- list_add_tail(&ss->list, sync_list);
-next:
- mnt = mnt_list_next(mnt_ns, &mnt->mnt_list);
- }
- up_read(&namespace_sem);
- return ret;
-}
-
-static void sync_filesystems_ve(struct ve_struct *ve, int wait)
-{
- struct super_block *sb;
- LIST_HEAD(sync_list);
- struct sync_sb *ss;
-
- /*
- * We don't need to care about allocating failure here. At least we
- * don't need to skip sync on such error.
- * Let's sync what we collected already instead.
- */
- sync_collect_filesystems(ve, &sync_list);
-
- list_for_each_entry(ss, &sync_list, list) {
- sb = ss->sb;
- down_read(&sb->s_umount);
- if (!sb_rdonly(sb) && sb->s_root && (sb->s_flags & SB_BORN))
- __sync_filesystem(sb, wait);
- up_read(&sb->s_umount);
- }
-
- sync_release_filesystems(&sync_list);
-}
-
-static int is_sb_ve_accessible(struct ve_struct *ve, struct super_block *sb)
-{
- struct mount *mnt;
- struct mnt_namespace *mnt_ns;
- struct nsproxy *ve_ns;
- int ret = 0;
-
- down_read(&namespace_sem);
-
- rcu_read_lock();
- ve_ns = rcu_dereference(ve->ve_ns);
- if (!ve_ns) {
- rcu_read_unlock();
- up_read(&namespace_sem);
- return 0;
- }
- mnt_ns = ve_ns->mnt_ns;
- rcu_read_unlock();
-
- list_for_each_entry(mnt, &mnt_ns->list, mnt_list) {
- if (mnt->mnt.mnt_sb == sb) {
- ret = 1;
- break;
- }
- }
- up_read(&namespace_sem);
- return ret;
-}
-
-static int __ve_fsync_behavior(struct ve_struct *ve)
-{
- /*
- * - __ve_fsync_behavior() is not called for ve0
- * - FSYNC_FILTERED for veX does NOT mean "filtered" behavior
- * - FSYNC_FILTERED for veX means "get value from ve0"
- */
- if (ve->fsync_enable == FSYNC_FILTERED)
- return get_ve0()->fsync_enable;
- else if (ve->fsync_enable)
- return FSYNC_FILTERED; /* sync forced by ve is always filtered */
- else
- return 0;
-}
-
-int ve_fsync_behavior(void)
-{
- struct ve_struct *ve;
-
- ve = get_exec_env();
- if (ve_is_super(ve))
- return FSYNC_ALWAYS;
- else
- return __ve_fsync_behavior(ve);
-}
-
/*
* Sync everything. We start by waking flusher threads so that most of
* writeback runs on all devices in parallel. Then we sync all inodes reliably
@@ -265,32 +108,8 @@ int ve_fsync_behavior(void)
*/
void ksys_sync(void)
{
- struct ve_struct *ve = get_exec_env();
int nowait = 0, wait = 1;
- if (!ve_is_super(ve)) {
- int fsb;
- /*
- * init can't sync during VE stop. Rationale:
- * - NFS with -o hard will block forever as network is down
- * - no useful job is performed as VE0 will call umount/sync
- * by his own later
- * Den
- */
- if (is_child_reaper(task_pid(current)))
- return;
-
- fsb = __ve_fsync_behavior(ve);
- if (fsb == FSYNC_NEVER)
- return;
-
- if (fsb == FSYNC_FILTERED) {
- sync_filesystems_ve(ve, nowait);
- sync_filesystems_ve(ve, wait);
- return;
- }
- }
-
wakeup_flusher_threads(WB_REASON_SYNC);
iterate_supers(sync_inodes_one_sb, NULL);
iterate_supers(sync_fs_one_sb, &nowait);
@@ -343,42 +162,18 @@ SYSCALL_DEFINE1(syncfs, int, fd)
{
struct fd f = fdget(fd);
struct super_block *sb;
- int ret = 0, ret2 = 0;
- struct ve_struct *ve;
+ int ret, ret2;
if (!f.file)
return -EBADF;
sb = f.file->f_path.dentry->d_sb;
- ve = get_exec_env();
-
- if (!ve_is_super(ve)) {
- int fsb;
- /*
- * init can't sync during VE stop. Rationale:
- * - NFS with -o hard will block forever as network is down
- * - no useful job is performed as VE0 will call umount/sync
- * by his own later
- * Den
- */
- if (is_child_reaper(task_pid(current)))
- goto fdput;
-
- fsb = __ve_fsync_behavior(ve);
- if (fsb == FSYNC_NEVER)
- goto fdput;
-
- if ((fsb == FSYNC_FILTERED) && !is_sb_ve_accessible(ve, sb))
- goto fdput;
- }
-
down_read(&sb->s_umount);
ret = sync_filesystem(sb);
up_read(&sb->s_umount);
ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
-fdput:
fdput(f);
return ret ? ret : ret2;
}
@@ -422,13 +217,9 @@ EXPORT_SYMBOL(vfs_fsync);
static int do_fsync(unsigned int fd, int datasync)
{
- struct fd f;
+ struct fd f = fdget(fd);
int ret = -EBADF;
- if (ve_fsync_behavior() == FSYNC_NEVER)
- return 0;
-
- f = fdget(fd);
if (f.file) {
ret = vfs_fsync(f.file, datasync);
fdput(f);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 42021f0151f5..01419dbd864b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -71,7 +71,6 @@ struct fsverity_operations;
struct fs_context;
struct fs_parameter_spec;
struct fileattr;
-struct mnt_namespace;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -2522,7 +2521,6 @@ void kill_anon_super(struct super_block *sb);
void kill_litter_super(struct super_block *sb);
void deactivate_super(struct super_block *sb);
void deactivate_locked_super(struct super_block *sb);
-void put_super(struct super_block *sb);
int set_anon_super(struct super_block *s, void *data);
int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
int get_anon_bdev(dev_t *);
@@ -3148,13 +3146,6 @@ extern bool path_is_under(const struct path *, const struct path *);
extern char *file_path(struct file *, char *, int);
-int ve_fsync_behavior(void);
-
-#define FSYNC_NEVER 0 /* ve syncs are ignored */
-#define FSYNC_ALWAYS 1 /* ve syncs work as ususal */
-#define FSYNC_FILTERED 2 /* ve syncs only its files */
-/* For non-ve0 FSYNC_FILTERED value means "get value from ve0". */
-
#include <linux/err.h>
/* needed for stackable file system support */
@@ -3504,9 +3495,6 @@ void setattr_copy(struct user_namespace *, struct inode *inode,
extern int file_update_time(struct file *file);
-extern struct mount *mnt_list_next(struct mnt_namespace *ns,
- struct list_head *p);
-
static inline bool vma_is_dax(const struct vm_area_struct *vma)
{
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 4c8f7d308829..1a66063d9ba8 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -61,8 +61,6 @@ struct ve_struct {
struct kstat_lat_pcpu_struct sched_lat_ve;
int odirect_enable;
- int fsync_enable;
-
#if IS_ENABLED(CONFIG_BINFMT_MISC)
struct binfmt_misc *binfmt_misc;
#endif
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 557a14f216c4..48be49337a87 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -70,7 +70,6 @@ struct ve_struct ve0 = {
.sched_lat_ve.cur = &ve0_lat_stats,
.netns_avail_nr = ATOMIC_INIT(INT_MAX),
.netns_max_nr = INT_MAX,
- .fsync_enable = FSYNC_FILTERED,
._randomize_va_space =
#ifdef CONFIG_COMPAT_BRK
1,
@@ -931,8 +930,6 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
ve->meminfo_val = VE_MEMINFO_DEFAULT;
ve->odirect_enable = 2;
- /* for veX FSYNC_FILTERED means "get value from ve0 */
- ve->fsync_enable = FSYNC_FILTERED;
atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
index e255fe57d447..b0aba35b6be9 100644
--- a/kernel/ve/veowner.c
+++ b/kernel/ve/veowner.c
@@ -7,7 +7,6 @@
*
*/
-#include <linux/ve.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
@@ -67,13 +66,6 @@ static struct ctl_table vz_fs_table[] = {
.extra1 = &ve_mount_nr_min,
.extra2 = &ve_mount_nr_max,
},
- {
- .procname = "fsync-enable",
- .data = &ve0.fsync_enable,
- .maxlen = sizeof(int),
- .mode = 0644 | S_ISVTX,
- .proc_handler = &proc_dointvec_virtual,
- },
{ }
};
diff --git a/mm/msync.c b/mm/msync.c
index 20737eb4b76b..137d1c104f3e 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -51,8 +51,6 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
if (end < start)
goto out;
error = 0;
- if (ve_fsync_behavior() == FSYNC_NEVER)
- goto out;
if (end == start)
goto out;
/*
More information about the Devel
mailing list