[Devel] [PATCH v2 rh7 2/2] mm/page-writeback: Introduce per-CT dirty memory limit.
Vladimir Davydov
vdavydov at virtuozzo.com
Wed Jan 20 08:30:45 PST 2016
On Tue, Jan 19, 2016 at 07:14:30PM +0300, Andrey Ryabinin wrote:
...
> @@ -1394,6 +1394,124 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
> /*
> * balance_dirty_pages() must be called by processes which are generating dirty
> * data. It looks at the number of dirty pages in the machine and will force
> + * the caller to perform writeback if the system is over `vm_dirty_ratio'.
> + * If we're over `background_thresh' then the writeback threads are woken to
> + * perform some writeout.
> + */
I don't think we need to copy the comment.
> +static void balance_dirty_pages_ub(struct address_space *mapping,
> + unsigned long write_chunk)
> +{
> + long ub_dirty, ub_writeback;
> + long ub_thresh, ub_background_thresh;
> + unsigned long pages_written = 0;
> + unsigned long pause = 1;
> + struct user_beancounter *ub = get_io_ub();
> +
> + struct backing_dev_info *bdi = mapping->backing_dev_info;
> +
> + for (;;) {
> + struct writeback_control wbc = {
> + .sync_mode = WB_SYNC_NONE,
> + .nr_to_write = write_chunk,
> + .range_cyclic = 1,
> + };
You only use wbc.nr_to_write, hence I guess you needn't define this
struct.
> +
> + if (ub_dirty_limits(&ub_background_thresh, &ub_thresh, ub)) {
> + ub_dirty = ub_stat_get(ub, dirty_pages);
> + ub_writeback = ub_stat_get(ub, writeback_pages);
> + } else {
> + ub_dirty = ub_writeback = 0;
> + ub_thresh = ub_background_thresh = LONG_MAX / 2;
We could just break the loop here instead.
> + }
> +
> + /*
> + * Check thresholds, set dirty_exceeded flags and
> + * start background writeback before throttling.
> + */
> + if (ub_dirty + ub_writeback <= ub_thresh)
> + break;
> + if (!test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags))
> + set_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags);
UB_DIRTY_EXCEEDED is never used. Please drop it along with ub_flags in a
separate patch.
> + if (!writeback_in_progress(bdi))
> + bdi_start_background_writeback(bdi);
> +
> + /*
> + * Throttle it only when the background writeback cannot
> + * catch-up. This avoids (excessively) small writeouts
> + * when the bdi limits are ramping up.
> + */
> + if (bdi_cap_account_writeback(bdi) &&
This bdi_cap_account_writeback check is a part of
diff-writeback-throttle-writer-when-local-BDI-threshold-is-hit
which was was dropped in Vz7 (see PSBM-39167), so please remove it.
> + ub_dirty + ub_writeback <
> + (ub_background_thresh + ub_thresh) / 2)
> + break;
> +
> + if (ub_dirty > ub_thresh) {
> + writeback_inodes_wb(&bdi->wb, wbc.nr_to_write,
> + WB_REASON_BACKGROUND, ub);
> + pages_written += write_chunk - wbc.nr_to_write;
This doesn't work as intended - wbc.nr_to_write never changes.
> + ub_dirty = ub_stat_get(ub, dirty_pages);
> + ub_writeback = ub_stat_get(ub, writeback_pages);
> + }
> +
> + /* fixup ub-stat per-cpu drift to avoid false-positive */
> + if (ub_dirty + ub_writeback > ub_thresh &&
> + ub_dirty + ub_writeback - ub_thresh <
> + UB_STAT_BATCH * num_possible_cpus()) {
> + ub_dirty = ub_stat_get_exact(ub, dirty_pages);
> + ub_writeback = ub_stat_get_exact(ub, writeback_pages);
> + }
> +
> + if (ub_dirty + ub_writeback <= ub_thresh)
> + break;
> +
> + if (pages_written >= write_chunk)
> + break; /* We've done our duty */
> +
> + __set_current_state(TASK_KILLABLE);
> + io_schedule_timeout(pause);
> +
> + /*
> + * Increase the delay for each loop, up to our previous
> + * default of taking a 100ms nap.
> + */
> + pause <<= 1;
> + if (pause > HZ / 10)
> + pause = HZ / 10;
> +
> + if (fatal_signal_pending(current))
> + break;
> + }
> +
> + if (ub_dirty + ub_writeback < ub_thresh &&
> + test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags))
> + clear_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags);
> +
> + virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_BALANCE_DIRTY,
> + (void*)write_chunk);
> +
> + /*
> + * Even if this is filtered writeback for other ub it will write
> + * inodes for this ub, because ub->dirty_exceeded is set.
> + */
> + if (writeback_in_progress(bdi))
> + return;
> +
> + /*
> + * In laptop mode, we wait until hitting the higher threshold before
> + * starting background writeout, and then write out all the way down
> + * to the lower threshold. So slow writers cause minimal disk activity.
> + *
> + * In normal mode, we start background writeout at the lower
> + * background_thresh, to keep the amount of dirty memory low.
> + */
> + if ((laptop_mode && pages_written) ||
> + (!laptop_mode && ub_dirty > ub_background_thresh))
> + bdi_start_background_writeback(bdi);
> +}
> +
> +/*
> + * balance_dirty_pages() must be called by processes which are generating dirty
> + * data. It looks at the number of dirty pages in the machine and will force
> * the caller to wait once crossing the (background_thresh + dirty_thresh) / 2.
> * If we're over `background_thresh' then the writeback threads are woken to
> * perform some writeout.
> @@ -1690,8 +1808,10 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
> }
> preempt_enable();
>
> - if (unlikely(current->nr_dirtied >= ratelimit))
> + if (unlikely(current->nr_dirtied >= ratelimit)) {
> + balance_dirty_pages_ub(mapping, ratelimit);
s/ratelimit/current->nr_dirtied ?
> balance_dirty_pages(mapping, current->nr_dirtied);
> + }
> }
> EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
More information about the Devel
mailing list