[Devel] [PATCH rh7 v2 2/3] ve/fs/sync: per containter sync and syncfs
Andrey Ryabinin
aryabinin at virtuozzo.com
Tue Feb 2 07:51:55 PST 2016
The implementation is straightforward, since we already have per container
writeback. Sync just get user beancounter and launches writeback work for it.
https://jira.sw.ru/browse/PSBM-39583
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
fs/fs-writeback.c | 85 +++++++++++++++++++++++++++++++++--------------
fs/mount.h | 3 ++
fs/namespace.c | 4 +--
fs/super.c | 2 +-
fs/sync.c | 43 ++++++++++++++----------
include/linux/fs.h | 1 +
include/linux/writeback.h | 5 +++
7 files changed, 98 insertions(+), 45 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d48530f9..5817404 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -132,7 +132,8 @@ out_unlock:
static void
__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
- bool range_cyclic, enum wb_reason reason)
+ struct user_beancounter *ub, bool range_cyclic,
+ enum wb_reason reason)
{
struct wb_writeback_work *work;
@@ -151,6 +152,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
work->nr_pages = nr_pages;
work->range_cyclic = range_cyclic;
work->reason = reason;
+ work->ub = ub;
bdi_queue_work(bdi, work);
}
@@ -170,7 +172,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
enum wb_reason reason)
{
- __bdi_start_writeback(bdi, nr_pages, true, reason);
+ __bdi_start_writeback(bdi, nr_pages, NULL, true, reason);
}
/**
@@ -1148,7 +1150,8 @@ void bdi_writeback_workfn(struct work_struct *work)
* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
* the whole world.
*/
-void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
+void wakeup_flusher_threads_ub(long nr_pages, struct user_beancounter *ub,
+ enum wb_reason reason)
{
struct backing_dev_info *bdi;
@@ -1159,11 +1162,16 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
- __bdi_start_writeback(bdi, nr_pages, false, reason);
+ __bdi_start_writeback(bdi, nr_pages, ub, false, reason);
}
rcu_read_unlock();
}
+void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
+{
+ wakeup_flusher_threads_ub(nr_pages, NULL, reason);
+}
+
/*
* Wake up bdi's periodically to make sure dirtytime inodes gets
* written back periodically. We deliberately do *not* check the
@@ -1374,7 +1382,7 @@ out_unlock_inode:
}
EXPORT_SYMBOL(__mark_inode_dirty);
-static void wait_sb_inodes(struct super_block *sb)
+static void wait_sb_inodes(struct super_block *sb, struct user_beancounter *ub)
{
struct inode *inode, *old_inode = NULL;
@@ -1402,6 +1410,11 @@ static void wait_sb_inodes(struct super_block *sb)
spin_unlock(&inode->i_lock);
continue;
}
+ if (ub && (mapping->dirtied_ub != ub)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+
__iget(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
@@ -1427,17 +1440,8 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode);
}
-/**
- * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
- * @sb: the superblock
- * @nr: the number of pages to write
- * @reason: reason why some writeback work initiated
- *
- * Start writeback on some inodes on this super_block. No guarantees are made
- * on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO.
- */
-void writeback_inodes_sb_nr(struct super_block *sb,
+static void writeback_inodes_sb_ub_nr(struct super_block *sb,
+ struct user_beancounter *ub,
unsigned long nr,
enum wb_reason reason)
{
@@ -1449,6 +1453,7 @@ void writeback_inodes_sb_nr(struct super_block *sb,
.done = &done,
.nr_pages = nr,
.reason = reason,
+ .ub = ub,
};
if (sb->s_bdi == &noop_backing_dev_info)
@@ -1457,8 +1462,32 @@ void writeback_inodes_sb_nr(struct super_block *sb,
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
}
+
+/**
+ * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ * @reason: reason why some writeback work initiated
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb_nr(struct super_block *sb,
+ unsigned long nr,
+ enum wb_reason reason)
+{
+
+ writeback_inodes_sb_ub_nr(sb, NULL, nr, reason);
+}
EXPORT_SYMBOL(writeback_inodes_sb_nr);
+void writeback_inodes_sb_ub(struct super_block *sb, struct user_beancounter *ub,
+ enum wb_reason reason)
+{
+ return writeback_inodes_sb_ub_nr(sb, ub, get_nr_dirty_pages(), reason);
+}
+
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
@@ -1513,14 +1542,7 @@ int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
}
EXPORT_SYMBOL(try_to_writeback_inodes_sb);
-/**
- * sync_inodes_sb - sync sb inode pages
- * @sb: the superblock
- *
- * This function writes and waits on any dirty inode belonging to this
- * super_block.
- */
-void sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb_ub(struct super_block *sb, struct user_beancounter *ub)
{
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
@@ -1531,6 +1553,7 @@ void sync_inodes_sb(struct super_block *sb)
.done = &done,
.reason = WB_REASON_SYNC,
.for_sync = 1,
+ .ub = ub,
};
/* Nothing to do? */
@@ -1541,7 +1564,19 @@ void sync_inodes_sb(struct super_block *sb)
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
- wait_sb_inodes(sb);
+ wait_sb_inodes(sb, ub);
+}
+
+/**
+ * sync_inodes_sb - sync sb inode pages
+ * @sb: the superblock
+ *
+ * This function writes and waits on any dirty inode belonging to this
+ * super_block.
+ */
+void sync_inodes_sb(struct super_block *sb)
+{
+ sync_inodes_sb_ub(sb, NULL);
}
EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/mount.h b/fs/mount.h
index 3e16b57..b496064 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -70,6 +70,9 @@ static inline int is_mounted(struct vfsmount *mnt)
return !IS_ERR_OR_NULL(real_mount(mnt));
}
+extern struct rw_semaphore namespace_sem;
+extern struct mount *next_mnt(struct mount *p, struct mount *root);
+
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
static inline void get_mnt_ns(struct mnt_namespace *ns)
diff --git a/fs/namespace.c b/fs/namespace.c
index 62a12b7..fa9ee9e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -40,7 +40,7 @@ static int mnt_group_start = 1;
static struct list_head mount_hashtable[HASH_SIZE];
static struct list_head mountpoint_hashtable[HASH_SIZE];
static struct kmem_cache *mnt_cache __read_mostly;
-static struct rw_semaphore namespace_sem;
+struct rw_semaphore namespace_sem;
/* /sys/fs */
struct kobject *fs_kobj;
@@ -756,7 +756,7 @@ static void commit_tree(struct mount *mnt)
touch_mnt_namespace(n);
}
-static struct mount *next_mnt(struct mount *p, struct mount *root)
+struct mount *next_mnt(struct mount *p, struct mount *root)
{
struct list_head *next = p->mnt_mounts.next;
if (next == &p->mnt_mounts) {
diff --git a/fs/super.c b/fs/super.c
index d09e15a..11de99c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -258,7 +258,7 @@ static void __put_super(struct super_block *sb)
* Drops a temporary reference, frees superblock if there's no
* references left.
*/
-static void put_super(struct super_block *sb)
+void put_super(struct super_block *sb)
{
spin_lock(&sb_lock);
__put_super(sb);
diff --git a/fs/sync.c b/fs/sync.c
index 624c782..51141ce 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
#include <linux/syscalls.h>
@@ -17,6 +18,7 @@
#include <linux/backing-dev.h>
#include <linux/ve.h>
#include "internal.h"
+#include "mount.h"
#include <bc/beancounter.h>
#include <bc/io_acct.h>
@@ -35,9 +37,9 @@ static int __sync_filesystem(struct super_block *sb,
struct user_beancounter *ub, int wait)
{
if (wait)
- sync_inodes_sb(sb);
+ sync_inodes_sb_ub(sb, ub);
else
- writeback_inodes_sb(sb, WB_REASON_SYNC);
+ writeback_inodes_sb_ub(sb, ub, WB_REASON_SYNC);
if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait);
@@ -80,7 +82,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
if (!(sb->s_flags & MS_RDONLY))
- sync_inodes_sb(sb);
+ sync_inodes_sb_ub(sb, (struct user_beancounter *)arg);
}
static void sync_fs_one_sb(struct super_block *sb, void *arg)
@@ -99,8 +101,6 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
filemap_fdatawait(bdev->bd_inode->i_mapping);
}
-#if 0
-
struct sync_sb {
struct list_head list;
struct super_block *sb;
@@ -129,8 +129,8 @@ static int sync_filesystem_collected(struct list_head *sync_list, struct super_b
static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync_list)
{
- struct vfsmount *root = ve->root_path.mnt;
- struct vfsmount *mnt;
+ struct mount *root = real_mount(ve->root_path.mnt);
+ struct mount *mnt;
struct sync_sb *ss;
int ret = 0;
@@ -138,7 +138,7 @@ static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync
down_read(&namespace_sem);
for (mnt = root; mnt; mnt = next_mnt(mnt, root)) {
- if (sync_filesystem_collected(sync_list, mnt->mnt_sb))
+ if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
continue;
ss = kmalloc(sizeof(*ss), GFP_KERNEL);
@@ -146,7 +146,7 @@ static int sync_collect_filesystems(struct ve_struct *ve, struct list_head *sync
ret = -ENOMEM;
break;
}
- ss->sb = mnt->mnt_sb;
+ ss->sb = mnt->mnt.mnt_sb;
/*
* We hold mount point and thus can be sure, that superblock is
* alive. And it means, that we can safely increase it's usage
@@ -189,8 +189,6 @@ static void sync_filesystems_ve(struct ve_struct *ve, struct user_beancounter *u
mutex_unlock(&ve->sync_mutex);
}
-#endif
-
static int __ve_fsync_behavior(struct ve_struct *ve)
{
if (ve->fsync_enable == 2)
@@ -212,6 +210,14 @@ int ve_fsync_behavior(void)
return __ve_fsync_behavior(ve);
}
+static void sync_filesystems(struct user_beancounter *ub, int wait)
+{
+ if (!ub || (ub == get_ub0()))
+ iterate_supers(sync_fs_one_sb, &wait);
+ else
+ sync_filesystems_ve(get_exec_env(), ub, wait);
+}
+
/*
* Sync everything. We start by waking flusher threads so that most of
* writeback runs on all devices in parallel. Then we sync all inodes reliably
@@ -225,7 +231,7 @@ int ve_fsync_behavior(void)
SYSCALL_DEFINE0(sync)
{
struct ve_struct *ve = get_exec_env();
- struct user_beancounter *ub;
+ struct user_beancounter *ub, *sync_ub = NULL;
int nowait = 0, wait = 1;
ub = get_exec_ub();
@@ -246,15 +252,18 @@ SYSCALL_DEFINE0(sync)
fsb = __ve_fsync_behavior(ve);
if (fsb == FSYNC_NEVER)
goto skip;
+
+ if (fsb == FSYNC_FILTERED)
+ sync_ub = get_io_ub();
}
- wakeup_flusher_threads(0, WB_REASON_SYNC);
- iterate_supers(sync_inodes_one_sb, NULL);
- iterate_supers(sync_fs_one_sb, &nowait);
- iterate_supers(sync_fs_one_sb, &wait);
+ wakeup_flusher_threads_ub(0, ub, WB_REASON_SYNC);
+ iterate_supers(sync_inodes_one_sb, sync_ub);
+ sync_filesystems(sync_ub, nowait);
+ sync_filesystems(sync_ub, wait);
iterate_bdevs(fdatawrite_one_bdev, NULL);
iterate_bdevs(fdatawait_one_bdev, NULL);
- if (unlikely(laptop_mode))
+ if (unlikely(laptop_mode) && !sync_ub)
laptop_sync_completion();
skip:
ub_percpu_inc(ub, sync_done);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9e6f777..b035f62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2164,6 +2164,7 @@ void kill_anon_super(struct super_block *sb);
void kill_litter_super(struct super_block *sb);
void deactivate_super(struct super_block *sb);
void deactivate_locked_super(struct super_block *sb);
+void put_super(struct super_block *sb);
int set_anon_super(struct super_block *s, void *data);
int get_anon_bdev(dev_t *);
void free_anon_bdev(dev_t);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a193a7e..01e5651 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -93,13 +93,18 @@ struct writeback_control {
struct bdi_writeback;
int inode_wait(void *);
void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
+void writeback_inodes_sb_ub(struct super_block *, struct user_beancounter *,
+ enum wb_reason reason);
void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
enum wb_reason reason);
int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
enum wb_reason reason);
void sync_inodes_sb(struct super_block *);
+void sync_inodes_sb_ub(struct super_block *, struct user_beancounter *ub);
void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
+void wakeup_flusher_threads_ub(long nr_pages, struct user_beancounter *ub,
+ enum wb_reason reason);
void inode_wait_for_writeback(struct inode *inode);
/* writeback.h requires fs.h; it, too, is not included from here. */
--
2.4.10
More information about the Devel
mailing list