[PATCH 2/2] blk-throtl: add relative percentage support to latency=

From: Tejun Heo
Date: Thu Nov 09 2017 - 17:20:09 EST


This patch updates latency= handling so that the latency target can
also be specified as a percentage. This allows, in addition to the
default absolute latency target, to specify the latency target as a
percentage of the baseline (say, 120% of the expected latency).

A given blkg can only have either absolute or percentage latency
target. The propgation is updated so that we always consider both
targets and follow whatever is the least protecting on the path to the
root.

The percentage latency target is specified and presented with the '%'
suffix.

$ echo 8:16 rbps=$((100<<20)) riops=100 wbps=$((100<<20)) wiops=100 \
idle=$((1000*1000)) latency=120% > io.low
$ cat io.low
8:16 rbps=104857600 wbps=104857600 riops=100 wiops=100 idle=1000000 latency=120%

Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
Cc: Shaohua Li <shli@xxxxxxxxxx>
---
block/blk-throttle.c | 66 +++++++++++++++++++++++++++++++++++++++------------
1 file changed, 51 insertions(+), 15 deletions(-)

--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -27,6 +27,7 @@ static int throtl_quantum = 32;
#define MIN_THROTL_BPS (320 * 1024)
#define MIN_THROTL_IOPS (10)
#define DFL_LATENCY_TARGET (-1L)
+#define DFL_LATENCY_TARGET_PCT (-1L)
#define DFL_IDLE_THRESHOLD (0)
#define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */
#define LATENCY_FILTERED_SSD (0)
@@ -164,8 +165,11 @@ struct throtl_grp {

unsigned long last_check_time;

+ /* Either both target and target_pct are DFL or neither is */
unsigned long latency_target; /* us */
unsigned long latency_target_conf; /* us */
+ unsigned long latency_target_pct; /* % */
+ unsigned long latency_target_pct_conf; /* % */
/* When did we start a new slice */
unsigned long slice_start[2];
unsigned long slice_end[2];
@@ -511,6 +515,8 @@ static struct blkg_policy_data *throtl_p

tg->latency_target = DFL_LATENCY_TARGET;
tg->latency_target_conf = DFL_LATENCY_TARGET;
+ tg->latency_target_pct = DFL_LATENCY_TARGET_PCT;
+ tg->latency_target_pct_conf = DFL_LATENCY_TARGET_PCT;
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;

@@ -1417,6 +1423,8 @@ static void tg_conf_updated(struct throt
parent_tg->idletime_threshold);
this_tg->latency_target = max(this_tg->latency_target,
parent_tg->latency_target);
+ this_tg->latency_target_pct = max(this_tg->latency_target_pct,
+ parent_tg->latency_target_pct);
}

/*
@@ -1528,7 +1536,7 @@ static u64 tg_prfill_limit(struct seq_fi
u64 bps_dft;
unsigned int iops_dft;
char idle_time[26] = "";
- char latency_time[26] = "";
+ char latency_time[27] = ""; /* +1 for the optional '%' */

if (!dname)
return 0;
@@ -1569,8 +1577,11 @@ static u64 tg_prfill_limit(struct seq_fi
snprintf(idle_time, sizeof(idle_time), " idle=%lu",
tg->idletime_threshold_conf);

- if (tg->latency_target_conf == ULONG_MAX)
+ if (tg->latency_target_conf == DFL_LATENCY_TARGET)
strcpy(latency_time, " latency=max");
+ else if (tg->latency_target_pct_conf)
+ snprintf(latency_time, sizeof(latency_time),
+ " latency=%lu%%", tg->latency_target_pct_conf);
else
snprintf(latency_time, sizeof(latency_time),
" latency=%lu", tg->latency_target_conf);
@@ -1597,7 +1608,7 @@ static ssize_t tg_set_limit(struct kernf
struct throtl_grp *tg;
u64 v[4];
unsigned long idle_time;
- unsigned long latency_time;
+ unsigned long latency_time, latency_pct;
int ret;
int index = of_cft(of)->private;

@@ -1614,8 +1625,10 @@ static ssize_t tg_set_limit(struct kernf

idle_time = tg->idletime_threshold_conf;
latency_time = tg->latency_target_conf;
+ latency_pct = tg->latency_target_pct_conf;
while (true) {
char tok[27]; /* wiops=18446744073709551616 */
+ char is_pct = 0;
char *p;
u64 val = U64_MAX;
int len;
@@ -1629,7 +1642,11 @@ static ssize_t tg_set_limit(struct kernf
ret = -EINVAL;
p = tok;
strsep(&p, "=");
- if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
+ if (!p || (sscanf(p, "%llu%c", &val, &is_pct) < 1 &&
+ strcmp(p, "max")))
+ goto out_finish;
+
+ if (is_pct && (is_pct != '%' || strcmp(tok, "latency")))
goto out_finish;

ret = -ERANGE;
@@ -1637,20 +1654,33 @@ static ssize_t tg_set_limit(struct kernf
goto out_finish;

ret = -EINVAL;
- if (!strcmp(tok, "rbps"))
+ if (!strcmp(tok, "rbps")) {
v[0] = val;
- else if (!strcmp(tok, "wbps"))
+ } else if (!strcmp(tok, "wbps")) {
v[1] = val;
- else if (!strcmp(tok, "riops"))
+ } else if (!strcmp(tok, "riops")) {
v[2] = min_t(u64, val, UINT_MAX);
- else if (!strcmp(tok, "wiops"))
+ } else if (!strcmp(tok, "wiops")) {
v[3] = min_t(u64, val, UINT_MAX);
- else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
+ } else if (off == LIMIT_LOW && !strcmp(tok, "idle")) {
idle_time = val;
- else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
- latency_time = val;
- else
+ } else if (off == LIMIT_LOW && !strcmp(tok, "latency")) {
+ /* gonna use max of the two, set the other one to 0 */
+ if (val != U64_MAX) {
+ if (is_pct) {
+ latency_time = 0;
+ latency_pct = val;
+ } else {
+ latency_time = val;
+ latency_pct = 0;
+ }
+ } else {
+ latency_time = DFL_LATENCY_TARGET;
+ latency_pct = DFL_LATENCY_TARGET_PCT;
+ }
+ } else {
goto out_finish;
+ }
}

tg->bps_conf[READ][index] = v[0];
@@ -1674,6 +1704,7 @@ static ssize_t tg_set_limit(struct kernf
tg->iops_conf[WRITE][LIMIT_MAX]);
tg->idletime_threshold_conf = idle_time;
tg->latency_target_conf = latency_time;
+ tg->latency_target_pct_conf = latency_pct;

/* force user to configure all settings for low limit */
if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
@@ -1686,9 +1717,11 @@ static ssize_t tg_set_limit(struct kernf
tg->iops[WRITE][LIMIT_LOW] = 0;
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
tg->latency_target = DFL_LATENCY_TARGET;
+ tg->latency_target_pct = DFL_LATENCY_TARGET_PCT;
} else if (index == LIMIT_LOW) {
tg->idletime_threshold = tg->idletime_threshold_conf;
tg->latency_target = tg->latency_target_conf;
+ tg->latency_target_pct = tg->latency_target_pct_conf;
}

blk_throtl_update_limit_valid(tg->td);
@@ -1799,7 +1832,7 @@ static bool throtl_tg_is_idle(struct thr
tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
(ktime_get_ns() >> 10) - tg->last_finish_time > time ||
tg->avg_idletime > tg->idletime_threshold ||
- (tg->latency_target && tg->bio_cnt &&
+ ((tg->latency_target || tg->latency_target_pct) && tg->bio_cnt &&
tg->bad_bio_cnt * 5 < tg->bio_cnt);
throtl_log(&tg->service_queue,
"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
@@ -2293,13 +2326,16 @@ void blk_throtl_bio_endio(struct bio *bi
throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
bio_op(bio), lat);

- if (tg->latency_target && lat >= tg->td->filtered_latency) {
+ if ((tg->latency_target || tg->latency_target_pct) &&
+ lat >= tg->td->filtered_latency) {
int bucket;
unsigned int threshold;

bucket = request_bucket_index(
blk_stat_size(&bio->bi_issue_stat));
- threshold = tg->latency_target;
+ threshold = max(tg->latency_target,
+ tg->latency_target_pct *
+ tg->td->avg_buckets[bucket].latency / 100);
if (lat > threshold)
tg->bad_bio_cnt++;
/*