[Devel] [PATCH v2 rh7 2/2] mm/page-writeback: Introduce per-CT dirty memory limit.

Andrey Ryabinin aryabinin at virtuozzo.com
Tue Jan 19 08:14:30 PST 2016


This patch adds balance_dirty_pages_ub() which is implements per-UB
logic of balance_dirty_pages() from PCS6. balance_dirty_pages_ub() is invoked
right before the original balance_dirty_pages() to issue per-UB writeback.
After that balance_dirty_pages() will be executed for global writeback.

https://jira.sw.ru/browse/PSBM-33841

Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 mm/page-writeback.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 429c759..7b54682 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1394,6 +1394,124 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
 /*
  * balance_dirty_pages() must be called by processes which are generating dirty
  * data.  It looks at the number of dirty pages in the machine and will force
+ * the caller to perform writeback if the system is over `vm_dirty_ratio'.
+ * If we're over `background_thresh' then the writeback threads are woken to
+ * perform some writeout.
+ */
+static void balance_dirty_pages_ub(struct address_space *mapping,
+				unsigned long write_chunk)
+{
+	long ub_dirty, ub_writeback;
+	long ub_thresh, ub_background_thresh;
+	unsigned long pages_written = 0;
+	unsigned long pause = 1;
+	struct user_beancounter *ub = get_io_ub();
+
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+
+	for (;;) {
+		struct writeback_control wbc = {
+			.sync_mode	= WB_SYNC_NONE,
+			.nr_to_write	= write_chunk,
+			.range_cyclic	= 1,
+		};
+
+		if (ub_dirty_limits(&ub_background_thresh, &ub_thresh, ub)) {
+			ub_dirty = ub_stat_get(ub, dirty_pages);
+			ub_writeback = ub_stat_get(ub, writeback_pages);
+		} else {
+			ub_dirty = ub_writeback = 0;
+			ub_thresh = ub_background_thresh = LONG_MAX / 2;
+		}
+
+		/*
+		 * Check thresholds, set dirty_exceeded flags and
+		 * start background writeback before throttling.
+		 */
+		if (ub_dirty + ub_writeback <= ub_thresh)
+			break;
+		if (!test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags))
+			set_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags);
+		if (!writeback_in_progress(bdi))
+			bdi_start_background_writeback(bdi);
+
+		/*
+		 * Throttle it only when the background writeback cannot
+		 * catch-up. This avoids (excessively) small writeouts
+		 * when the bdi limits are ramping up.
+		 */
+		if (bdi_cap_account_writeback(bdi) &&
+			ub_dirty + ub_writeback <
+				(ub_background_thresh + ub_thresh) / 2)
+			break;
+
+		if (ub_dirty > ub_thresh) {
+			writeback_inodes_wb(&bdi->wb, wbc.nr_to_write,
+					WB_REASON_BACKGROUND, ub);
+			pages_written += write_chunk - wbc.nr_to_write;
+			ub_dirty = ub_stat_get(ub, dirty_pages);
+			ub_writeback = ub_stat_get(ub, writeback_pages);
+		}
+
+		/* fixup ub-stat per-cpu drift to avoid false-positive */
+		if (ub_dirty + ub_writeback > ub_thresh &&
+		    ub_dirty + ub_writeback - ub_thresh <
+				    UB_STAT_BATCH * num_possible_cpus()) {
+			ub_dirty = ub_stat_get_exact(ub, dirty_pages);
+			ub_writeback = ub_stat_get_exact(ub, writeback_pages);
+		}
+
+		if (ub_dirty + ub_writeback <= ub_thresh)
+			break;
+
+		if (pages_written >= write_chunk)
+			break;		/* We've done our duty */
+
+		__set_current_state(TASK_KILLABLE);
+		io_schedule_timeout(pause);
+
+		/*
+		 * Increase the delay for each loop, up to our previous
+		 * default of taking a 100ms nap.
+		 */
+		pause <<= 1;
+		if (pause > HZ / 10)
+			pause = HZ / 10;
+
+		if (fatal_signal_pending(current))
+			break;
+	}
+
+	if (ub_dirty + ub_writeback < ub_thresh &&
+	    test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags))
+		clear_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags);
+
+	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_BALANCE_DIRTY,
+			       (void*)write_chunk);
+
+	/*
+	 * Even if this is filtered writeback for other ub it will write
+	 * inodes for this ub, because ub->dirty_exceeded is set.
+	 */
+	if (writeback_in_progress(bdi))
+		return;
+
+	/*
+	 * In laptop mode, we wait until hitting the higher threshold before
+	 * starting background writeout, and then write out all the way down
+	 * to the lower threshold.  So slow writers cause minimal disk activity.
+	 *
+	 * In normal mode, we start background writeout at the lower
+	 * background_thresh, to keep the amount of dirty memory low.
+	 */
+	if ((laptop_mode && pages_written) ||
+		 (!laptop_mode && ub_dirty > ub_background_thresh))
+		bdi_start_background_writeback(bdi);
+}
+
+/*
+ * balance_dirty_pages() must be called by processes which are generating dirty
+ * data.  It looks at the number of dirty pages in the machine and will force
  * the caller to wait once crossing the (background_thresh + dirty_thresh) / 2.
  * If we're over `background_thresh' then the writeback threads are woken to
  * perform some writeout.
@@ -1690,8 +1808,10 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
 	}
 	preempt_enable();
 
-	if (unlikely(current->nr_dirtied >= ratelimit))
+	if (unlikely(current->nr_dirtied >= ratelimit)) {
+		balance_dirty_pages_ub(mapping, ratelimit);
 		balance_dirty_pages(mapping, current->nr_dirtied);
+	}
 }
 EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
 
-- 
2.4.10



More information about the Devel mailing list