Re: [PATCH 33/51] writeback: make bdi_has_dirty_io() take multiple bdi_writeback's into account

From: Jan Kara
Date: Tue Jun 30 2015 - 12:48:43 EST


On Fri 22-05-15 17:13:47, Tejun Heo wrote:
> bdi_has_dirty_io() used to only reflect whether the root wb
> (bdi_writeback) has dirty inodes. For cgroup writeback support, it
> needs to take all active wb's into account. If any wb on the bdi has
> dirty inodes, bdi_has_dirty_io() should return true.
>
> To achieve that, as inode_wb_list_{move|del}_locked() now keep track
> of the dirty state transition of each wb, the number of dirty wbs can
> be counted in the bdi; however, bdi is already aggregating
> wb->avg_write_bandwidth which can easily be guaranteed to be > 0 when
> there are any dirty inodes by ensuring wb->avg_write_bandwidth can't
> dip below 1. bdi_has_dirty_io() can simply test whether
> bdi->tot_write_bandwidth is zero or not.
>
> While this bumps the value of wb->avg_write_bandwidth to one when it
> used to be zero, this shouldn't cause any meaningful behavior
> difference.
>
> bdi_has_dirty_io() is made an inline function which tests whether
> ->tot_write_bandwidth is non-zero. Also, WARN_ON_ONCE()'s on its
> value are added to inode_wb_list_{move|del}_locked().

It looks OK although I find using total write bandwidth to detect whether
any wb has any dirty IO rather hacky. Frankly I'd prefer to just iterate
all wbs from bdi_has_dirty_io() since that isn't performance critical
and we iterate all wbs in those paths anyway... Hmm?

Honza

> Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Jens Axboe <axboe@xxxxxxxxx>
> Cc: Jan Kara <jack@xxxxxxx>
> ---
> fs/fs-writeback.c | 5 +++--
> include/linux/backing-dev-defs.h | 8 ++++++--
> include/linux/backing-dev.h | 10 +++++++++-
> mm/backing-dev.c | 5 -----
> mm/page-writeback.c | 10 +++++++---
> 5 files changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index bbccf68..c98d392 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -99,6 +99,7 @@ static bool wb_io_lists_populated(struct bdi_writeback *wb)
> return false;
> } else {
> set_bit(WB_has_dirty_io, &wb->state);
> + WARN_ON_ONCE(!wb->avg_write_bandwidth);
> atomic_long_add(wb->avg_write_bandwidth,
> &wb->bdi->tot_write_bandwidth);
> return true;
> @@ -110,8 +111,8 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
> if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
> list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
> clear_bit(WB_has_dirty_io, &wb->state);
> - atomic_long_sub(wb->avg_write_bandwidth,
> - &wb->bdi->tot_write_bandwidth);
> + WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
> + &wb->bdi->tot_write_bandwidth) < 0);
> }
> }
>
> diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
> index d631a61..8c857d7 100644
> --- a/include/linux/backing-dev-defs.h
> +++ b/include/linux/backing-dev-defs.h
> @@ -98,7 +98,7 @@ struct bdi_writeback {
> unsigned long dirtied_stamp;
> unsigned long written_stamp; /* pages written at bw_time_stamp */
> unsigned long write_bandwidth; /* the estimated write bandwidth */
> - unsigned long avg_write_bandwidth; /* further smoothed write bw */
> + unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */
>
> /*
> * The base dirty throttle rate, re-calculated on every 200ms.
> @@ -142,7 +142,11 @@ struct backing_dev_info {
> unsigned int min_ratio;
> unsigned int max_ratio, max_prop_frac;
>
> - atomic_long_t tot_write_bandwidth; /* sum of active avg_write_bw */
> + /*
> + * Sum of avg_write_bw of wbs with dirty inodes. > 0 if there are
> + * any dirty wbs, which is depended upon by bdi_has_dirty().
> + */
> + atomic_long_t tot_write_bandwidth;
>
> struct bdi_writeback wb; /* the root writeback info for this bdi */
> struct bdi_writeback_congested wb_congested; /* its congested state */
> diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
> index 3c8403c..0839e44 100644
> --- a/include/linux/backing-dev.h
> +++ b/include/linux/backing-dev.h
> @@ -29,7 +29,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
> enum wb_reason reason);
> void bdi_start_background_writeback(struct backing_dev_info *bdi);
> void wb_workfn(struct work_struct *work);
> -bool bdi_has_dirty_io(struct backing_dev_info *bdi);
> void wb_wakeup_delayed(struct bdi_writeback *wb);
>
> extern spinlock_t bdi_lock;
> @@ -42,6 +41,15 @@ static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
> return test_bit(WB_has_dirty_io, &wb->state);
> }
>
> +static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
> +{
> + /*
> + * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
> + * any dirty wbs. See wb_update_write_bandwidth().
> + */
> + return atomic_long_read(&bdi->tot_write_bandwidth);
> +}
> +
> static inline void __add_wb_stat(struct bdi_writeback *wb,
> enum wb_stat_item item, s64 amount)
> {
> diff --git a/mm/backing-dev.c b/mm/backing-dev.c
> index 161ddf1..d2f16fc9 100644
> --- a/mm/backing-dev.c
> +++ b/mm/backing-dev.c
> @@ -256,11 +256,6 @@ static int __init default_bdi_init(void)
> }
> subsys_initcall(default_bdi_init);
>
> -bool bdi_has_dirty_io(struct backing_dev_info *bdi)
> -{
> - return wb_has_dirty_io(&bdi->wb);
> -}
> -
> /*
> * This function is used when the first inode for this wb is marked dirty. It
> * wakes-up the corresponding bdi thread which should then take care of the
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index c95eb24..99b8846 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -881,9 +881,13 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
> avg += (old - avg) >> 3;
>
> out:
> - if (wb_has_dirty_io(wb))
> - atomic_long_add(avg - wb->avg_write_bandwidth,
> - &wb->bdi->tot_write_bandwidth);
> + /* keep avg > 0 to guarantee that tot > 0 if there are dirty wbs */
> + avg = max(avg, 1LU);
> + if (wb_has_dirty_io(wb)) {
> + long delta = avg - wb->avg_write_bandwidth;
> + WARN_ON_ONCE(atomic_long_add_return(delta,
> + &wb->bdi->tot_write_bandwidth) <= 0);
> + }
> wb->write_bandwidth = bw;
> wb->avg_write_bandwidth = avg;
> }
> --
> 2.4.0
>
--
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/