[patch 13/15] sched: expire slack quota using generation counters

From: Paul Turner
Date: Tue Mar 22 2011 - 23:12:43 EST


Introduce a generational counter which is incremented each quota period. This
allows us to determine on a per-cpu basis whether the currently operating quota
is "current" or not, without requiring us to visit every cpu and explicitly
expire quota on every new period.

Signed-off-by: Paul Turner <pjt@xxxxxxxxxx>
Signed-off-by: Bharata B Rao <bharata@xxxxxxxxxxxxxxxxxx>

---
kernel/sched.c | 6 ++++++
kernel/sched_fair.c | 42 +++++++++++++++++++++++++++++++++++++-----
2 files changed, 43 insertions(+), 5 deletions(-)

Index: tip/kernel/sched.c
===================================================================
--- tip.orig/kernel/sched.c
+++ tip/kernel/sched.c
@@ -256,6 +256,7 @@ struct cfs_bandwidth {
s64 hierarchal_quota; /* used for validating consistency */
struct hrtimer period_timer;

+ int quota_generation;
struct list_head throttled_cfs_rq;
/* throttle statistics */
u64 nr_periods;
@@ -396,6 +397,7 @@ struct cfs_rq {
s64 quota_remaining;
u64 throttled_timestamp;

+ int quota_generation;
struct list_head throttled_list;
#endif
#endif
@@ -436,8 +438,10 @@ void init_cfs_bandwidth(struct cfs_bandw
raw_spin_lock_init(&cfs_b->lock);
cfs_b->quota = cfs_b->runtime = quota;
cfs_b->period = ns_to_ktime(period);
+ cfs_b->quota_generation = 0;
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);

+
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_b->period_timer.function = sched_cfs_period_timer;

@@ -9333,6 +9337,8 @@ static int tg_set_cfs_bandwidth(struct t
raw_spin_lock_irq(&cfs_b->lock);
cfs_b->period = ns_to_ktime(period);
cfs_b->runtime = cfs_b->quota = quota;
+
+ cfs_bump_quota_generation(cfs_b);
raw_spin_unlock_irq(&cfs_b->lock);

for_each_possible_cpu(i) {
Index: tip/kernel/sched_fair.c
===================================================================
--- tip.orig/kernel/sched_fair.c
+++ tip/kernel/sched_fair.c
@@ -1331,11 +1331,25 @@ static void check_cfs_rq_quota(struct cf
resched_task(rq_of(cfs_rq)->curr);
}

+static void cfs_bump_quota_generation(struct cfs_bandwidth *cfs_b)
+{
+ cfs_b->quota_generation++;
+ smp_mb();
+}
+
+static inline int cfs_rq_quota_current(struct cfs_rq *cfs_rq)
+{
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+
+ return cfs_rq->quota_generation == cfs_b->quota_generation;
+}
+
static void request_cfs_rq_quota(struct cfs_rq *cfs_rq)
{
struct task_group *tg = cfs_rq->tg;
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
u64 amount = 0, min_amount;
+ int generation;

min_amount = sched_cfs_bandwidth_slice() + (-cfs_rq->quota_remaining);

@@ -1347,10 +1361,18 @@ static void request_cfs_rq_quota(struct
} else {
amount = min_amount;
}
+ generation = cfs_b->quota_generation;
raw_spin_unlock(&cfs_b->lock);
}

+ /* a deficit should be carried forwards, surplus should be dropped */
+
+ if (generation != cfs_rq->quota_generation &&
+ cfs_rq->quota_remaining > 0)
+ cfs_rq->quota_remaining = 0;
+
cfs_rq->quota_remaining += amount;
+ cfs_rq->quota_generation = generation;
}

static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
@@ -1361,8 +1383,13 @@ static void account_cfs_rq_quota(struct

cfs_rq->quota_remaining -= delta_exec;

- if (cfs_rq->quota_remaining > 0)
- return;
+ /* we only want to charge deficits against the next generation */
+ if (likely(cfs_rq->quota_remaining > 0)) {
+ if (unlikely(!cfs_rq_quota_current(cfs_rq)))
+ cfs_rq->quota_remaining = 0;
+ else
+ return;
+ }

request_cfs_rq_quota(cfs_rq);
}
@@ -1492,7 +1519,8 @@ static void unthrottle_cfs_rq(struct cfs
resched_task(rq->curr);
}

-static u64 distribute_cfs_bandwidth(struct cfs_bandwidth *cfs_b, u64 runtime)
+static u64 distribute_cfs_bandwidth(struct cfs_bandwidth *cfs_b, u64 runtime,
+ int generation)
{
struct cfs_rq *cfs_rq;
u64 quota, remaining = runtime;
@@ -1512,6 +1540,7 @@ static u64 distribute_cfs_bandwidth(stru
remaining -= quota;

cfs_rq->quota_remaining += quota;
+ cfs_rq->quota_generation = generation;
if (cfs_rq_throttled(cfs_rq) && cfs_rq->quota_remaining > 0)
unthrottle_cfs_rq(cfs_rq);

@@ -1529,12 +1558,15 @@ next:
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
{
u64 runtime, runtime_assigned;
- int idle, throttled;
+ int idle, throttled, generation;

raw_spin_lock(&cfs_b->lock);
runtime = cfs_b->quota;
idle = cfs_b->runtime == cfs_b->runtime_assigned;
throttled = cfs_b->runtime == 0;
+
+ cfs_bump_quota_generation(cfs_b);
+ generation = cfs_b->quota_generation;
raw_spin_unlock(&cfs_b->lock);

if (runtime == RUNTIME_INF)
@@ -1543,7 +1575,7 @@ static int do_sched_cfs_period_timer(str
runtime *= overrun;
runtime_assigned = runtime;

- runtime = distribute_cfs_bandwidth(cfs_b, runtime);
+ runtime = distribute_cfs_bandwidth(cfs_b, runtime, generation);

raw_spin_lock(&cfs_b->lock);
cfs_b->runtime = runtime;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/