[Devel] [PATCH RHEL7 COMMIT] ve/fs/writeback: per-CT fs writeback
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Jan 22 01:35:13 PST 2016
The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.24
------>
commit a9eb8cde445d30601507a2bb3b638611d1a93cd2
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date: Fri Jan 22 13:35:13 2016 +0400
ve/fs/writeback: per-CT fs writeback
The main idea is following:
* for background works we check all UBs for exceeding dirty limit.
* background work goes on if any UB has exceed dirty limit.
* In that case, writeback will skip inodes if those belong to
"within dirty-limits UB"
writeback_inodes_wb() gain an new 'struct user_beancounter *ub' argument
which is needed for targeted per-CT writeback. This will be used in the next
patch.
https://jira.sw.ru/browse/PSBM-33841
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Reviewed-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
fs/fs-writeback.c | 29 +++++++++++++++++++++++------
include/bc/io_acct.h | 7 ++++++-
include/linux/backing-dev.h | 2 ++
kernel/bc/io_acct.c | 42 ++++++++++++++++++++++++++++++++++++++++--
4 files changed, 71 insertions(+), 9 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ac8066b..7b83367 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -42,6 +42,7 @@ struct wb_writeback_work {
struct super_block *sb;
unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
+ unsigned int filter_ub:1;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
@@ -51,6 +52,7 @@ struct wb_writeback_work {
struct list_head list; /* pending work list */
struct completion *done; /* set if the caller waits */
+ struct user_beancounter *ub;
};
/*
@@ -724,6 +726,13 @@ static long writeback_sb_inodes(struct super_block *sb,
trace_writeback_sb_inodes_requeue(inode);
continue;
}
+ if ((work->ub || work->filter_ub) &&
+ ub_should_skip_writeback(work->ub, inode)) {
+ spin_unlock(&inode->i_lock);
+ requeue_io(inode, wb);
+ continue;
+ }
+
spin_unlock(&wb->list_lock);
/*
@@ -809,14 +818,15 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
return wrote;
}
-static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
- enum wb_reason reason)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+ enum wb_reason reason, struct user_beancounter *ub)
{
struct wb_writeback_work work = {
.nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
.range_cyclic = 1,
.reason = reason,
+ .ub = ub,
};
spin_lock(&wb->list_lock);
@@ -904,8 +914,14 @@ static long wb_writeback(struct bdi_writeback *wb,
* For background writeout, stop when we are below the
* background dirty threshold
*/
- if (work->for_background && !over_bground_thresh(wb->bdi))
- break;
+ if (work->for_background) {
+ if (over_bground_thresh(wb->bdi))
+ work->filter_ub = 0;
+ else if (ub_over_bground_thresh())
+ work->filter_ub = 1;
+ else
+ break;
+ }
/*
* Kupdate and background works are special and we want to
@@ -996,7 +1012,8 @@ static unsigned long get_nr_dirty_pages(void)
static long wb_check_background_flush(struct bdi_writeback *wb)
{
- if (over_bground_thresh(wb->bdi)) {
+ if (over_bground_thresh(wb->bdi) ||
+ ub_over_bground_thresh()) {
struct wb_writeback_work work = {
.nr_pages = LONG_MAX,
@@ -1115,7 +1132,7 @@ void bdi_writeback_workfn(struct work_struct *work)
* enough for efficient IO.
*/
pages_written = writeback_inodes_wb(&bdi->wb, 1024,
- WB_REASON_FORKER_THREAD);
+ WB_REASON_FORKER_THREAD, NULL);
trace_writeback_pages_written(pages_written);
}
diff --git a/include/bc/io_acct.h b/include/bc/io_acct.h
index fa7afb1..e0af0bf 100644
--- a/include/bc/io_acct.h
+++ b/include/bc/io_acct.h
@@ -58,7 +58,7 @@ extern void ub_io_writeback_dec(struct address_space *mapping);
extern int ub_dirty_limits(unsigned long *pbackground,
long *pdirty, struct user_beancounter *ub);
-
+extern bool ub_over_bground_thresh(void);
extern bool ub_should_skip_writeback(struct user_beancounter *ub,
struct inode *inode);
@@ -116,6 +116,11 @@ static inline struct user_beancounter *get_io_ub(void)
return NULL;
}
+static inline bool ub_over_bground_thresh(void)
+{
+ return false;
+}
+
#endif /* UBC_IO_ACCT */
#endif
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 859504b..b7668cf 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -130,6 +130,8 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
enum wb_reason reason);
void bdi_start_background_writeback(struct backing_dev_info *bdi);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+ enum wb_reason reason, struct user_beancounter *ub);
void bdi_writeback_workfn(struct work_struct *work);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
diff --git a/kernel/bc/io_acct.c b/kernel/bc/io_acct.c
index f9778f8..ce41829 100644
--- a/kernel/bc/io_acct.c
+++ b/kernel/bc/io_acct.c
@@ -126,12 +126,48 @@ void ub_io_writeback_dec(struct address_space *mapping)
}
}
+static bool __ub_over_bground_thresh(struct user_beancounter *ub)
+{
+ unsigned long background_thresh, dirty_thresh;
+ unsigned long ub_dirty, ub_writeback;
+
+ ub_dirty_limits(&background_thresh, &dirty_thresh, ub);
+
+ ub_dirty = ub_stat_get(ub, dirty_pages);
+ ub_writeback = ub_stat_get(ub, writeback_pages);
+
+ if (ub_dirty + ub_writeback >= background_thresh)
+ return true;
+
+ return false;
+}
+
+bool ub_over_bground_thresh(void)
+{
+ struct user_beancounter *ub;
+ bool ret = false;
+
+ rcu_read_lock();
+ for_each_beancounter(ub) {
+ if (ub == get_ub0())
+ continue;
+ if (__ub_over_bground_thresh(ub)) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
int ub_dirty_limits(unsigned long *pbackground,
long *pdirty, struct user_beancounter *ub)
{
int dirty_ratio;
unsigned long available_memory;
+ *pdirty = *pbackground = LONG_MAX;
+
dirty_ratio = ub_dirty_ratio;
if (!dirty_ratio)
return 0;
@@ -157,8 +193,10 @@ bool ub_should_skip_writeback(struct user_beancounter *ub, struct inode *inode)
rcu_read_lock();
dirtied_ub = rcu_dereference(inode->i_mapping->dirtied_ub);
- ret = !dirtied_ub || (dirtied_ub != ub &&
- !test_bit(UB_DIRTY_EXCEEDED, &dirtied_ub->ub_flags));
+ if (ub)
+ ret = (ub != dirtied_ub);
+ else
+ ret = (dirtied_ub && !__ub_over_bground_thresh(dirtied_ub));
rcu_read_unlock();
return ret;
More information about the Devel
mailing list