Re: [patch 04/15] sched: throttle cfs_rq entities which exceed theirlocal quota

From: Paul Turner
Date: Thu Mar 24 2011 - 03:40:47 EST

Next message: Minchan Kim: "Re: [PATCH 1/5] vmscan: remove all_unreclaimable check from directreclaim path completely"
Previous message: Len Brown: "[PATCH] APM: delete APM in Linux-2.6.40"
In reply to: Bharata B Rao: "Re: [patch 04/15] sched: throttle cfs_rq entities which exceed theirlocal quota"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Wed, Mar 23, 2011 at 11:36 PM, Bharata B Rao
<bharata@xxxxxxxxxxxxxxxxxx> wrote:
> On Tue, Mar 22, 2011 at 08:03:30PM -0700, Paul Turner wrote:
>> In account_cfs_rq_quota() (via update_curr()) we track consumption versus a
>> cfs_rqs locally assigned quota and whether there is global quota available
>> to provide a refill when it runs out.
>>
>> In the case that there is no quota remaining it's necessary to throttle so
>> that execution ceases until the susbequent period. While it is at this
>> boundary that we detect (and signal for, via reshed_task) that a throttle is
>> required, the actual operation is deferred until put_prev_entity().
>>
>> At this point the cfs_rq is marked as throttled and not re-enqueued, this
>> avoids potential interactions with throttled runqueues in the event that we
>> are not immediately able to evict the running task.
>>
>> Signed-off-by: Paul Turner <pjt@xxxxxxxxxx>
>> Signed-off-by: Nikhil Rao <ncrao@xxxxxxxxxx>
>> Signed-off-by: Bharata B Rao <bharata@xxxxxxxxxxxxxxxxxx>
>> ---
>> kernel/sched.c | 2
>> kernel/sched_fair.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++---
>> 2 files changed, 113 insertions(+), 6 deletions(-)
>>
>> Index: tip/kernel/sched.c
>> ===================================================================
>> --- tip.orig/kernel/sched.c
>> +++ tip/kernel/sched.c
>> @@ -386,7 +386,7 @@ struct cfs_rq {
>> unsigned long load_contribution;
>> #endif
>> #ifdef CONFIG_CFS_BANDWIDTH
>> - int quota_enabled;
>> + int quota_enabled, throttled;
>> s64 quota_remaining;
>> #endif
>> #endif
>> Index: tip/kernel/sched_fair.c
>> ===================================================================
>> --- tip.orig/kernel/sched_fair.c
>> +++ tip/kernel/sched_fair.c
>> @@ -321,9 +321,6 @@ find_matching_se(struct sched_entity **s
>>
>> #endif /* CONFIG_FAIR_GROUP_SCHED */
>>
>> -static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
>> - unsigned long delta_exec);
>> -
>> /**************************************************************
>> * Scheduling class tree data structure manipulation methods:
>> */
>> @@ -588,6 +585,9 @@ __update_curr(struct cfs_rq *cfs_rq, str
>> #endif
>> }
>>
>> +static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
>> + unsigned long delta_exec);
>> +
>> static void update_curr(struct cfs_rq *cfs_rq)
>> {
>> struct sched_entity *curr = cfs_rq->curr;
>> @@ -1221,6 +1221,9 @@ static struct sched_entity *pick_next_en
>> return se;
>> }
>>
>> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq);
>> +static inline int within_bandwidth(struct cfs_rq *cfs_rq);
>> +
>> static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
>> {
>> /*
>> @@ -1230,6 +1233,9 @@ static void put_prev_entity(struct cfs_r
>> if (prev->on_rq)
>> update_curr(cfs_rq);
>>
>> + if (!within_bandwidth(cfs_rq))
>> + throttle_cfs_rq(cfs_rq);
>> +
>> check_spread(cfs_rq, prev);
>> if (prev->on_rq) {
>> update_stats_wait_start(cfs_rq, prev);
>> @@ -1241,6 +1247,8 @@ static void put_prev_entity(struct cfs_r
>> cfs_rq->curr = NULL;
>> }
>>
>> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq);
>> +
>> static void
>> entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>> {
>> @@ -1249,6 +1257,9 @@ entity_tick(struct cfs_rq *cfs_rq, struc
>> */
>> update_curr(cfs_rq);
>>
>> + /* check that entity's usage is still within quota (if enabled) */
>> + check_cfs_rq_quota(cfs_rq);
>> +
>> /*
>> * Update share accounting for long-running entities.
>> */
>> @@ -1294,6 +1305,46 @@ static inline u64 sched_cfs_bandwidth_sl
>> return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
>> }
>>
>> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
>> +{
>> + return cfs_rq->throttled;
>> +}
>> +
>> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
>> +{
>> + struct task_group *tg;
>> + struct sched_entity *se;
>> +
>> + if (cfs_rq_throttled(cfs_rq))
>> + return 1;
>> +
>> + tg = cfs_rq->tg;
>> + se = tg->se[cpu_of(rq_of(cfs_rq))];
>> + if (!se)
>> + return 0;
>> +
>> + for_each_sched_entity(se) {
>> + if (cfs_rq_throttled(cfs_rq_of(se)))
>> + return 1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static inline int within_bandwidth(struct cfs_rq *cfs_rq)
>> +{
>> + return !cfs_rq->quota_enabled || cfs_rq->quota_remaining > 0;
>> +}
>> +
>> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq)
>> +{
>> + if (within_bandwidth(cfs_rq))
>> + return;
>> +
>> +
>> + resched_task(rq_of(cfs_rq)->curr);
>> +}
>> +
>> static void request_cfs_rq_quota(struct cfs_rq *cfs_rq)
>> {
>> struct task_group *tg = cfs_rq->tg;
>> @@ -1330,6 +1381,29 @@ static void account_cfs_rq_quota(struct
>> request_cfs_rq_quota(cfs_rq);
>> }
>>
>> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
>> +{
>> + struct sched_entity *se;
>> +
>> + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
>> +
>> + /* account load preceding throttle */
>> + update_cfs_load(cfs_rq, 0);
>> +
>> + for_each_sched_entity(se) {
>> + struct cfs_rq *qcfs_rq = cfs_rq_of(se);
>> + /* throttled entity or throttle-on-deactivate */
>> + if (!se->on_rq)
>> + break;
>> +
>> + dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
>> + if (qcfs_rq->load.weight)
>> + break;
>> + }
>> +
>> + cfs_rq->throttled = 1;
>> +}
>
> Since throttling is done from put_prev_entity(), iiuc, you will be
> doing 'put' for current entities which are not on the tree. Can you
> avoid the dequeue_entity() call here which I think will anyway bail out
> from actual dequeueing (se != cfs_rq->curr check in dequeue_entity).
>

No -- cfs_rq->curr is still wholly enqueued less residency in the
rb-tree; this includes factors such as the number of runnable entities
and contribution to load. The dequeue is necessary; a throttle is
analogous to the current task blocking, only on a group entity level.

> Regards,
> Bharata.
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Minchan Kim: "Re: [PATCH 1/5] vmscan: remove all_unreclaimable check from directreclaim path completely"
Previous message: Len Brown: "[PATCH] APM: delete APM in Linux-2.6.40"
In reply to: Bharata B Rao: "Re: [patch 04/15] sched: throttle cfs_rq entities which exceed theirlocal quota"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]