[RFC 3/3] blk-throttling: detect inactive cgroup
From: Shaohua Li
Date: Wed Jan 20 2016 - 12:49:39 EST
If a cgroup is inactive for some time, it should be excluded from
bandwidth calculation.
Signed-off-by: Shaohua Li <shli@xxxxxx>
---
block/blk-throttle.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 67 insertions(+), 4 deletions(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b3f847d..5c11270 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -15,6 +15,9 @@
#define MAX_WEIGHT (1000)
#define WEIGHT_RATIO_SHIFT (12)
#define WEIGHT_RATIO (1 << WEIGHT_RATIO_SHIFT)
+/* must less than the interval we update bandwidth */
+#define CGCHECK_TIME (msecs_to_jiffies(20))
+
/* Max dispatch from a group in 1 round */
static int throtl_grp_quantum = 8;
@@ -81,6 +84,9 @@ struct throtl_service_queue {
unsigned int weight;
unsigned int children_weight;
unsigned int ratio;
+
+ unsigned long active_timestamp;
+ bool active;
};
enum tg_state_flags {
@@ -162,6 +168,7 @@ struct throtl_data
bool bw_based;
bool weight_based;
+ unsigned long last_check_timestamp;
};
static void throtl_pending_timer_fn(unsigned long arg);
@@ -390,7 +397,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
sq->parent_sq = &td->service_queue;
if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
- sq->parent_sq->children_weight += sq->weight;
tg->td = td;
}
@@ -424,7 +430,7 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
struct throtl_grp *tg = pd_to_tg(pd);
struct throtl_service_queue *sq = &tg->service_queue;
- if (sq->parent_sq)
+ if (sq->active && sq->parent_sq)
sq->parent_sq->children_weight -= sq->weight;
del_timer_sync(&tg->service_queue.pending_timer);
@@ -930,7 +936,7 @@ static void tg_update_bps(struct throtl_grp *tg)
sq = &tg->service_queue;
parent_sq = sq->parent_sq;
- if (!tg->td->weight_based || !parent_sq)
+ if (!tg->td->weight_based || !parent_sq || !sq->active)
return;
sq->ratio = max_t(unsigned int,
parent_sq->ratio * sq->weight / parent_sq->children_weight,
@@ -965,6 +971,26 @@ static void tg_update_ratio(struct throtl_grp *tg)
}
}
+static void tg_update_active_time(struct throtl_grp *tg)
+{
+ struct throtl_service_queue *sq = &tg->service_queue;
+ bool update_ratio = false;
+ unsigned long now = jiffies;
+
+ while (sq->parent_sq) {
+ sq->active_timestamp = now;
+ if (!sq->active) {
+ sq->parent_sq->children_weight += sq->weight;
+ sq->active = true;
+ update_ratio = true;
+ }
+ sq = sq->parent_sq;
+ };
+
+ if (update_ratio)
+ tg_update_ratio(tg);
+}
+
static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
{
struct throtl_service_queue *sq = &tg->service_queue;
@@ -984,6 +1010,8 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
throtl_charge_bio(tg, bio);
+ tg_update_active_time(tg);
+
/*
* If our parent is another tg, we just need to transfer @bio to
* the parent using throtl_add_bio_tg(). If our parent is
@@ -1319,7 +1347,7 @@ static ssize_t tg_set_weight(struct kernfs_open_file *of,
old_weight = tg->service_queue.weight;
tg->service_queue.weight = v;
- if (tg->service_queue.parent_sq) {
+ if (tg->service_queue.active && tg->service_queue.parent_sq) {
struct throtl_service_queue *psq = tg->service_queue.parent_sq;
if (v > old_weight)
psq->children_weight += v - old_weight;
@@ -1524,6 +1552,39 @@ static struct blkcg_policy blkcg_policy_throtl = {
.pd_free_fn = throtl_pd_free,
};
+static void detect_inactive_cg(struct throtl_grp *tg)
+{
+ struct throtl_data *td = tg->td;
+ struct throtl_service_queue *sq = &tg->service_queue;
+ unsigned long now = jiffies;
+ struct cgroup_subsys_state *pos_css;
+ struct blkcg_gq *blkg;
+ bool update_ratio = false;
+
+ tg_update_active_time(tg);
+
+ if (time_before(now, td->last_check_timestamp))
+ return;
+ td->last_check_timestamp = now + CGCHECK_TIME;
+
+ blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+ tg = blkg_to_tg(blkg);
+ sq = &tg->service_queue;
+ if (sq->parent_sq &&
+ time_before(sq->active_timestamp + CGCHECK_TIME, now) &&
+ !(sq->nr_queued[READ] || sq->nr_queued[WRITE])) {
+ if (sq->active && sq->parent_sq) {
+ sq->active = false;
+ sq->parent_sq->children_weight -= sq->weight;
+ update_ratio = true;
+ }
+ }
+ }
+
+ if (update_ratio)
+ tg_update_ratio(tg);
+}
+
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
struct bio *bio)
{
@@ -1546,6 +1607,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
sq = &tg->service_queue;
+ detect_inactive_cg(tg);
tg_update_bps(tg);
while (true) {
/* throtl is FIFO - if bios are already queued, should queue */
@@ -1696,6 +1758,7 @@ int blk_throtl_init(struct request_queue *q)
INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
throtl_service_queue_init(&td->service_queue);
td->service_queue.ratio = WEIGHT_RATIO;
+ td->service_queue.active = true;
q->td = td;
td->queue = q;
--
2.4.6