[Devel] [PATCH RHEL7 COMMIT] ve/fs/writeback: per-CT fs writeback

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jan 22 01:35:13 PST 2016


The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.24
------>
commit a9eb8cde445d30601507a2bb3b638611d1a93cd2
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date:   Fri Jan 22 13:35:13 2016 +0400

    ve/fs/writeback: per-CT fs writeback
    
    The main idea is following:
     * for background works we check all UBs for exceeding dirty limit.
     * background work goes on if any UB has exceed dirty limit.
     * In that case, writeback will skip inodes if those belong to
          "within dirty-limits UB"
    
    writeback_inodes_wb() gain an new 'struct user_beancounter *ub' argument
    which is needed for targeted per-CT writeback. This will be used in the next
    patch.
    
    https://jira.sw.ru/browse/PSBM-33841
    
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    Reviewed-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 fs/fs-writeback.c           | 29 +++++++++++++++++++++++------
 include/bc/io_acct.h        |  7 ++++++-
 include/linux/backing-dev.h |  2 ++
 kernel/bc/io_acct.c         | 42 ++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ac8066b..7b83367 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -42,6 +42,7 @@ struct wb_writeback_work {
 	struct super_block *sb;
 	unsigned long *older_than_this;
 	enum writeback_sync_modes sync_mode;
+	unsigned int filter_ub:1;
 	unsigned int tagged_writepages:1;
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
@@ -51,6 +52,7 @@ struct wb_writeback_work {
 
 	struct list_head list;		/* pending work list */
 	struct completion *done;	/* set if the caller waits */
+	struct user_beancounter *ub;
 };
 
 /*
@@ -724,6 +726,13 @@ static long writeback_sb_inodes(struct super_block *sb,
 			trace_writeback_sb_inodes_requeue(inode);
 			continue;
 		}
+		if ((work->ub || work->filter_ub) &&
+		     ub_should_skip_writeback(work->ub, inode)) {
+			spin_unlock(&inode->i_lock);
+			requeue_io(inode, wb);
+			continue;
+		}
+
 		spin_unlock(&wb->list_lock);
 
 		/*
@@ -809,14 +818,15 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
 	return wrote;
 }
 
-static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
-				enum wb_reason reason)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+			enum wb_reason reason, struct user_beancounter *ub)
 {
 	struct wb_writeback_work work = {
 		.nr_pages	= nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_cyclic	= 1,
 		.reason		= reason,
+		.ub		= ub,
 	};
 
 	spin_lock(&wb->list_lock);
@@ -904,8 +914,14 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 * For background writeout, stop when we are below the
 		 * background dirty threshold
 		 */
-		if (work->for_background && !over_bground_thresh(wb->bdi))
-			break;
+		if (work->for_background) {
+			if (over_bground_thresh(wb->bdi))
+				work->filter_ub = 0;
+			else if (ub_over_bground_thresh())
+				work->filter_ub = 1;
+			else
+				break;
+		}
 
 		/*
 		 * Kupdate and background works are special and we want to
@@ -996,7 +1012,8 @@ static unsigned long get_nr_dirty_pages(void)
 
 static long wb_check_background_flush(struct bdi_writeback *wb)
 {
-	if (over_bground_thresh(wb->bdi)) {
+	if (over_bground_thresh(wb->bdi) ||
+		ub_over_bground_thresh()) {
 
 		struct wb_writeback_work work = {
 			.nr_pages	= LONG_MAX,
@@ -1115,7 +1132,7 @@ void bdi_writeback_workfn(struct work_struct *work)
 		 * enough for efficient IO.
 		 */
 		pages_written = writeback_inodes_wb(&bdi->wb, 1024,
-						    WB_REASON_FORKER_THREAD);
+						WB_REASON_FORKER_THREAD, NULL);
 		trace_writeback_pages_written(pages_written);
 	}
 
diff --git a/include/bc/io_acct.h b/include/bc/io_acct.h
index fa7afb1..e0af0bf 100644
--- a/include/bc/io_acct.h
+++ b/include/bc/io_acct.h
@@ -58,7 +58,7 @@ extern void ub_io_writeback_dec(struct address_space *mapping);
 
 extern int ub_dirty_limits(unsigned long *pbackground,
 			   long *pdirty, struct user_beancounter *ub);
-
+extern bool ub_over_bground_thresh(void);
 extern bool ub_should_skip_writeback(struct user_beancounter *ub,
 				     struct inode *inode);
 
@@ -116,6 +116,11 @@ static inline struct user_beancounter *get_io_ub(void)
 	return NULL;
 }
 
+static inline bool ub_over_bground_thresh(void)
+{
+	return false;
+}
+
 #endif /* UBC_IO_ACCT */
 
 #endif
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 859504b..b7668cf 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -130,6 +130,8 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 			enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+			enum wb_reason reason, struct user_beancounter *ub);
 void bdi_writeback_workfn(struct work_struct *work);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
diff --git a/kernel/bc/io_acct.c b/kernel/bc/io_acct.c
index f9778f8..ce41829 100644
--- a/kernel/bc/io_acct.c
+++ b/kernel/bc/io_acct.c
@@ -126,12 +126,48 @@ void ub_io_writeback_dec(struct address_space *mapping)
 	}
 }
 
+static bool __ub_over_bground_thresh(struct user_beancounter *ub)
+{
+	unsigned long background_thresh, dirty_thresh;
+	unsigned long ub_dirty, ub_writeback;
+
+	ub_dirty_limits(&background_thresh, &dirty_thresh, ub);
+
+	ub_dirty = ub_stat_get(ub, dirty_pages);
+	ub_writeback = ub_stat_get(ub, writeback_pages);
+
+	if (ub_dirty + ub_writeback >= background_thresh)
+		return true;
+
+	return false;
+}
+
+bool ub_over_bground_thresh(void)
+{
+	struct user_beancounter *ub;
+	bool ret = false;
+
+	rcu_read_lock();
+	for_each_beancounter(ub) {
+		if (ub == get_ub0())
+			continue;
+		if (__ub_over_bground_thresh(ub)) {
+			ret = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
 int ub_dirty_limits(unsigned long *pbackground,
 		    long *pdirty, struct user_beancounter *ub)
 {
 	int dirty_ratio;
 	unsigned long available_memory;
 
+	*pdirty = *pbackground = LONG_MAX;
+
 	dirty_ratio = ub_dirty_ratio;
 	if (!dirty_ratio)
 		return 0;
@@ -157,8 +193,10 @@ bool ub_should_skip_writeback(struct user_beancounter *ub, struct inode *inode)
 
 	rcu_read_lock();
 	dirtied_ub = rcu_dereference(inode->i_mapping->dirtied_ub);
-	ret = !dirtied_ub || (dirtied_ub != ub &&
-			!test_bit(UB_DIRTY_EXCEEDED, &dirtied_ub->ub_flags));
+	if (ub)
+		ret = (ub != dirtied_ub);
+	else
+		ret = (dirtied_ub && !__ub_over_bground_thresh(dirtied_ub));
 	rcu_read_unlock();
 
 	return ret;


More information about the Devel mailing list