[Devel] [PATCH vz9 5/5] fs: per-VE sync
Nikita Yushchenko
nikita.yushchenko at virtuozzo.com
Mon Nov 22 09:20:32 MSK 2021
This contains part of vz7/vz8 per-VE sync code, updated to support
non-root mount namespaces within VE.
https://jira.sw.ru/browse/PSBM-44684
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
fs/sync.c | 98 ++++++++++++++++++++++++++++++++++++++++------
include/linux/fs.h | 2 +
kernel/ve/ve.c | 5 ++-
3 files changed, 90 insertions(+), 15 deletions(-)
diff --git a/fs/sync.c b/fs/sync.c
index 31e6f0c6402d..9ec0a8073300 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -70,11 +70,17 @@ int sync_filesystem(struct super_block *sb)
EXPORT_SYMBOL(sync_filesystem);
struct sync_arg {
+ struct ve_struct *ve;
int wait;
};
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
+ struct sync_arg *sarg = arg;
+
+ if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
+ return;
+
if (!sb_rdonly(sb))
sync_inodes_sb(sb);
}
@@ -83,9 +89,19 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg)
{
struct sync_arg *sarg = arg;
- if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
- sb->s_op->sync_fs)
- sb->s_op->sync_fs(sb, sarg->wait);
+ if (sarg->ve && !is_sb_ve_accessible(sarg->ve, sb))
+ return;
+
+ if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC)) {
+ if (sb->s_op->sync_fs)
+ sb->s_op->sync_fs(sb, sarg->wait);
+
+ /* For ve-local sync, process bdev here, since there is no easy
+ * equivalent of is_sb_ve_accessible() for bdevs
+ */
+ if (sarg->ve)
+ __sync_blockdev(sb->s_bdev, sarg->wait);
+ }
}
static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
@@ -103,6 +119,21 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
}
+static int __ve_fsync_behavior(struct ve_struct *ve)
+{
+ /*
+ * - __ve_fsync_behavior() is not called for ve0
+ * - FSYNC_FILTERED for veX does NOT mean "filtered" behavior
+ * - FSYNC_FILTERED for veX means "get value from ve0"
+ */
+ if (ve->fsync_enable == FSYNC_FILTERED)
+ return get_ve0()->fsync_enable;
+ else if (ve->fsync_enable)
+ return FSYNC_FILTERED; /* sync forced by ve is always filtered */
+ else
+ return 0;
+}
+
int ve_fsync_behavior(void)
{
struct ve_struct *ve;
@@ -111,7 +142,7 @@ int ve_fsync_behavior(void)
if (ve_is_super(ve))
return FSYNC_ALWAYS;
else
- return ve->fsync_enable;
+ return __ve_fsync_behavior(ve);
}
/*
@@ -126,21 +157,41 @@ int ve_fsync_behavior(void)
*/
void ksys_sync(void)
{
+ struct ve_struct *ve = get_exec_env();
struct sync_arg sarg;
- if (ve_fsync_behavior() == FSYNC_NEVER)
- return;
+ sarg.ve = NULL;
+ if (!ve_is_super(ve)) {
+ int fsb;
+ /*
+ * init can't sync during VE stop. Rationale:
+ * - NFS with -o hard will block forever as network is down
+ * - no useful job is performed as VE0 will call umount/sync
+ * by his own later
+ * Den
+ */
+ if (is_child_reaper(task_pid(current)))
+ return;
+
+ fsb = __ve_fsync_behavior(ve);
+ if (fsb == FSYNC_NEVER)
+ return;
+ if (fsb == FSYNC_FILTERED)
+ sarg.ve = ve;
+ }
wakeup_flusher_threads(WB_REASON_SYNC);
- iterate_supers(sync_inodes_one_sb, NULL);
+ iterate_supers(sync_inodes_one_sb, &sarg);
sarg.wait = 0;
iterate_supers(sync_fs_one_sb, &sarg);
sarg.wait = 1;
iterate_supers(sync_fs_one_sb, &sarg);
- iterate_bdevs(fdatawrite_one_bdev, NULL);
- iterate_bdevs(fdatawait_one_bdev, NULL);
- if (unlikely(laptop_mode))
- laptop_sync_completion();
+ if (!sarg.ve) {
+ iterate_bdevs(fdatawrite_one_bdev, NULL);
+ iterate_bdevs(fdatawait_one_bdev, NULL);
+ if (unlikely(laptop_mode))
+ laptop_sync_completion();
+ }
}
SYSCALL_DEFINE0(sync)
@@ -153,6 +204,7 @@ static void do_sync_work(struct work_struct *work)
{
struct sync_arg sarg;
+ sarg.ve = NULL;
sarg.wait = 0;
/*
@@ -188,13 +240,33 @@ SYSCALL_DEFINE1(syncfs, int, fd)
struct fd f = fdget(fd);
struct super_block *sb;
int ret = 0, ret2 = 0;
+ struct ve_struct *ve;
if (!f.file)
return -EBADF;
sb = f.file->f_path.dentry->d_sb;
- if (ve_fsync_behavior() == FSYNC_NEVER)
- goto fdput;
+ ve = get_exec_env();
+
+ if (!ve_is_super(ve)) {
+ int fsb;
+ /*
+ * init can't sync during VE stop. Rationale:
+ * - NFS with -o hard will block forever as network is down
+ * - no useful job is performed as VE0 will call umount/sync
+ * by his own later
+ * Den
+ */
+ if (is_child_reaper(task_pid(current)))
+ goto fdput;
+
+ fsb = __ve_fsync_behavior(ve);
+ if (fsb == FSYNC_NEVER)
+ goto fdput;
+
+ if ((fsb == FSYNC_FILTERED) && !is_sb_ve_accessible(ve, sb))
+ goto fdput;
+ }
down_read(&sb->s_umount);
ret = sync_filesystem(sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb21d1a32cdb..9f34e9384f88 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3148,6 +3148,8 @@ extern char *file_path(struct file *, char *, int);
#define FSYNC_NEVER 0 /* ve syncs are ignored */
#define FSYNC_ALWAYS 1 /* ve syncs work as ususal */
+#define FSYNC_FILTERED 2 /* ve syncs only its files */
+/* For non-ve0 FSYNC_FILTERED value means "get value from ve0". */
#ifdef CONFIG_VE
int ve_fsync_behavior(void);
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index e94aa90aff25..557a14f216c4 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -70,7 +70,7 @@ struct ve_struct ve0 = {
.sched_lat_ve.cur = &ve0_lat_stats,
.netns_avail_nr = ATOMIC_INIT(INT_MAX),
.netns_max_nr = INT_MAX,
- .fsync_enable = FSYNC_ALWAYS,
+ .fsync_enable = FSYNC_FILTERED,
._randomize_va_space =
#ifdef CONFIG_COMPAT_BRK
1,
@@ -931,7 +931,8 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
ve->meminfo_val = VE_MEMINFO_DEFAULT;
ve->odirect_enable = 2;
- ve->fsync_enable = FSYNC_ALWAYS;
+ /* for veX FSYNC_FILTERED means "get value from ve0 */
+ ve->fsync_enable = FSYNC_FILTERED;
atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
--
2.30.2
More information about the Devel
mailing list