[Devel] [PATCH v2 rh7 2/2] mm/page-writeback: Introduce per-CT dirty memory limit.

Wed Jan 20 08:30:45 PST 2016

On Tue, Jan 19, 2016 at 07:14:30PM +0300, Andrey Ryabinin wrote:
...
> @@ -1394,6 +1394,124 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
>  /*
>   * balance_dirty_pages() must be called by processes which are generating dirty
>   * data.  It looks at the number of dirty pages in the machine and will force
> + * the caller to perform writeback if the system is over `vm_dirty_ratio'.
> + * If we're over `background_thresh' then the writeback threads are woken to
> + * perform some writeout.
> + */

I don't think we need to copy the comment.

> +static void balance_dirty_pages_ub(struct address_space *mapping,
> +				unsigned long write_chunk)
> +{
> +	long ub_dirty, ub_writeback;
> +	long ub_thresh, ub_background_thresh;
> +	unsigned long pages_written = 0;
> +	unsigned long pause = 1;
> +	struct user_beancounter *ub = get_io_ub();
> +
> +	struct backing_dev_info *bdi = mapping->backing_dev_info;
> +
> +	for (;;) {
> +		struct writeback_control wbc = {
> +			.sync_mode	= WB_SYNC_NONE,
> +			.nr_to_write	= write_chunk,
> +			.range_cyclic	= 1,
> +		};

You only use wbc.nr_to_write, hence I guess you needn't define this
struct.

> +
> +		if (ub_dirty_limits(&ub_background_thresh, &ub_thresh, ub)) {
> +			ub_dirty = ub_stat_get(ub, dirty_pages);
> +			ub_writeback = ub_stat_get(ub, writeback_pages);
> +		} else {
> +			ub_dirty = ub_writeback = 0;
> +			ub_thresh = ub_background_thresh = LONG_MAX / 2;

We could just break the loop here instead.

> +		}
> +
> +		/*
> +		 * Check thresholds, set dirty_exceeded flags and
> +		 * start background writeback before throttling.
> +		 */
> +		if (ub_dirty + ub_writeback <= ub_thresh)
> +			break;
> +		if (!test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags))
> +			set_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags);

UB_DIRTY_EXCEEDED is never used. Please drop it along with ub_flags in a
separate patch.

> +		if (!writeback_in_progress(bdi))
> +			bdi_start_background_writeback(bdi);
> +
> +		/*
> +		 * Throttle it only when the background writeback cannot
> +		 * catch-up. This avoids (excessively) small writeouts
> +		 * when the bdi limits are ramping up.
> +		 */
> +		if (bdi_cap_account_writeback(bdi) &&

This bdi_cap_account_writeback check is a part of
diff-writeback-throttle-writer-when-local-BDI-threshold-is-hit
which was was dropped in Vz7 (see PSBM-39167), so please remove it.

> +			ub_dirty + ub_writeback <
> +				(ub_background_thresh + ub_thresh) / 2)
> +			break;
> +
> +		if (ub_dirty > ub_thresh) {
> +			writeback_inodes_wb(&bdi->wb, wbc.nr_to_write,
> +					WB_REASON_BACKGROUND, ub);
> +			pages_written += write_chunk - wbc.nr_to_write;

This doesn't work as intended - wbc.nr_to_write never changes.

> +			ub_dirty = ub_stat_get(ub, dirty_pages);
> +			ub_writeback = ub_stat_get(ub, writeback_pages);
> +		}
> +
> +		/* fixup ub-stat per-cpu drift to avoid false-positive */
> +		if (ub_dirty + ub_writeback > ub_thresh &&
> +		    ub_dirty + ub_writeback - ub_thresh <
> +				    UB_STAT_BATCH * num_possible_cpus()) {
> +			ub_dirty = ub_stat_get_exact(ub, dirty_pages);
> +			ub_writeback = ub_stat_get_exact(ub, writeback_pages);
> +		}
> +
> +		if (ub_dirty + ub_writeback <= ub_thresh)
> +			break;
> +
> +		if (pages_written >= write_chunk)
> +			break;		/* We've done our duty */
> +
> +		__set_current_state(TASK_KILLABLE);
> +		io_schedule_timeout(pause);
> +
> +		/*
> +		 * Increase the delay for each loop, up to our previous
> +		 * default of taking a 100ms nap.
> +		 */
> +		pause <<= 1;
> +		if (pause > HZ / 10)
> +			pause = HZ / 10;
> +
> +		if (fatal_signal_pending(current))
> +			break;
> +	}
> +
> +	if (ub_dirty + ub_writeback < ub_thresh &&
> +	    test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags))
> +		clear_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags);
> +
> +	virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_BALANCE_DIRTY,
> +			       (void*)write_chunk);
> +
> +	/*
> +	 * Even if this is filtered writeback for other ub it will write
> +	 * inodes for this ub, because ub->dirty_exceeded is set.
> +	 */
> +	if (writeback_in_progress(bdi))
> +		return;
> +
> +	/*
> +	 * In laptop mode, we wait until hitting the higher threshold before
> +	 * starting background writeout, and then write out all the way down
> +	 * to the lower threshold.  So slow writers cause minimal disk activity.
> +	 *
> +	 * In normal mode, we start background writeout at the lower
> +	 * background_thresh, to keep the amount of dirty memory low.
> +	 */
> +	if ((laptop_mode && pages_written) ||
> +		 (!laptop_mode && ub_dirty > ub_background_thresh))
> +		bdi_start_background_writeback(bdi);
> +}
> +
> +/*
> + * balance_dirty_pages() must be called by processes which are generating dirty
> + * data.  It looks at the number of dirty pages in the machine and will force
>   * the caller to wait once crossing the (background_thresh + dirty_thresh) / 2.
>   * If we're over `background_thresh' then the writeback threads are woken to
>   * perform some writeout.
> @@ -1690,8 +1808,10 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
>  	}
>  	preempt_enable();
>  
> -	if (unlikely(current->nr_dirtied >= ratelimit))
> +	if (unlikely(current->nr_dirtied >= ratelimit)) {
> +		balance_dirty_pages_ub(mapping, ratelimit);

s/ratelimit/current->nr_dirtied ?

>  		balance_dirty_pages(mapping, current->nr_dirtied);
> +	}
>  }
>  EXPORT_SYMBOL(balance_dirty_pages_ratelimited);