Re: [PATCH 7/8] wbt: add general throttling mechanism

From: xiakaixu
Date: Wed Apr 27 2016 - 08:07:54 EST



> + return rwb && rwb->wb_normal != 0;
> +}
> +
> +/*
> + * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
> + * false if 'v' + 1 would be bigger than 'below'.
> + */
> +static bool atomic_inc_below(atomic_t *v, int below)
> +{
> + int cur = atomic_read(v);
> +
> + for (;;) {
> + int old;
> +
> + if (cur >= below)
> + return false;
> + old = atomic_cmpxchg(v, cur, cur + 1);
> + if (old == cur)
> + break;
> + cur = old;
> + }
> +
> + return true;
> +}
> +
> +static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
> +{
> + if (rwb_enabled(rwb)) {
> + const unsigned long cur = jiffies;
> +
> + if (cur != *var)
> + *var = cur;
> + }
> +}
> +
> +void __wbt_done(struct rq_wb *rwb)
> +{
> + int inflight, limit = rwb->wb_normal;
> +
> + /*
> + * If the device does write back caching, drop further down
> + * before we wake people up.
> + */
> + if (rwb->wc && !atomic_read(&rwb->bdi->wb.dirty_sleeping))
> + limit = 0;
> + else
> + limit = rwb->wb_normal;
> +
> + /*
> + * Don't wake anyone up if we are above the normal limit. If
> + * throttling got disabled (limit == 0) with waiters, ensure
> + * that we wake them up.
> + */
> + inflight = atomic_dec_return(&rwb->inflight);
> + if (limit && inflight >= limit) {
> + if (!rwb->wb_max)
> + wake_up_all(&rwb->wait);
> + return;
> + }
> +
Hi Jens,

Just a little confused about this. The rwb->wb_max can't be 0 if the variable
'limit' does not equal to 0. So the if (!rwb->wb_max) branch maybe does not
make sense.


> + if (waitqueue_active(&rwb->wait)) {
> + int diff = limit - inflight;
> +
> + if (!inflight || diff >= rwb->wb_background / 2)
> + wake_up_nr(&rwb->wait, 1);
> + }
> +}
> +
> +/*
> + * Called on completion of a request. Note that it's also called when
> + * a request is merged, when the request gets freed.
> + */
> +void wbt_done(struct rq_wb *rwb, struct wb_issue_stat *stat)
> +{
> + if (!rwb)
> + return;
> +
> + if (!wbt_tracked(stat)) {
> + if (rwb->sync_cookie == stat) {
> + rwb->sync_issue = 0;
> + rwb->sync_cookie = NULL;
> + }
> +
> + wb_timestamp(rwb, &rwb->last_comp);
> + } else {
> + WARN_ON_ONCE(stat == rwb->sync_cookie);
> + __wbt_done(rwb);
> + wbt_clear_tracked(stat);
> + }
> +}
> +
> +static void calc_wb_limits(struct rq_wb *rwb)
> +{
> + unsigned int depth;
> +
> + if (!rwb->min_lat_nsec) {
> + rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
> + return;
> + }
> +
> + depth = min_t(unsigned int, RWB_MAX_DEPTH, rwb->queue_depth);
> +
> + /*
> + * Reduce max depth by 50%, and re-calculate normal/bg based on that
> + */
> + rwb->wb_max = 1 + ((depth - 1) >> min(31U, rwb->scale_step));
> + rwb->wb_normal = (rwb->wb_max + 1) / 2;
> + rwb->wb_background = (rwb->wb_max + 3) / 4;
> +}
> +
> +static bool inline stat_sample_valid(struct blk_rq_stat *stat)
> +{
> + /*
> + * We need at least one read sample, and a minimum of
> + * RWB_MIN_WRITE_SAMPLES. We require some write samples to know
> + * that it's writes impacting us, and not just some sole read on
> + * a device that is in a lower power state.
> + */
> + return stat[0].nr_samples >= 1 &&
> + stat[1].nr_samples >= RWB_MIN_WRITE_SAMPLES;
> +}
> +