Re: [RFC PATCH v3 10/10] sched/fair: Throttle CFS tasks on return to userspace

From: Peter Zijlstra
Date: Fri Jul 12 2024 - 13:43:31 EST


On Thu, Jul 11, 2024 at 03:00:04PM +0200, Valentin Schneider wrote:

> +static void throttle_one_task(struct cfs_rq *cfs_rq, struct task_struct *p)
> {
> + long task_delta, idle_task_delta;
> + struct sched_entity *se = &p->se;
> +
> + list_add(&p->throttle_node, &cfs_rq->throttled_limbo_list);
>
> + task_delta = 1;
> + idle_task_delta = cfs_rq_is_idle(cfs_rq) ? 1 : 0;
> +
> + for_each_sched_entity(se) {
> + cfs_rq = cfs_rq_of(se);
> +
> + if (!se->on_rq)
> + return;
> +
> + dequeue_entity(cfs_rq, se, DEQUEUE_SLEEP);
> + cfs_rq->h_nr_running -= task_delta;
> + cfs_rq->idle_h_nr_running -= idle_task_delta;
> +
> + if (cfs_rq->load.weight) {
> + /* Avoid re-evaluating load for this entity: */
> + se = parent_entity(se);
> + break;
> + }
> + }
> +
> + for_each_sched_entity(se) {
> + cfs_rq = cfs_rq_of(se);
> + /* throttled entity or throttle-on-deactivate */
> + if (!se->on_rq)
> + goto throttle_done;
> +
> + update_load_avg(cfs_rq, se, 0);
> + se_update_runnable(se);
> + cfs_rq->h_nr_running -= task_delta;
> + cfs_rq->h_nr_running -= idle_task_delta;
> + }
> +
> +throttle_done:
> + /* At this point se is NULL and we are at root level*/
> + sub_nr_running(rq_of(cfs_rq), 1);
> }

I know you're just moving code around, but we should look if we can
share code between this and dequeue_task_fair().

I have patches around this in that eevdf series I should send out again,
I'll try and have a stab.

> -static void task_throttle_cancel_irq_work_fn(struct irq_work *work)
> +static void throttle_cfs_rq_work(struct callback_head *work)
> {
> - /* Write me */
> + struct task_struct *p = container_of(work, struct task_struct, sched_throttle_work);
> + struct sched_entity *se;
> + struct rq *rq;
> + struct cfs_rq *cfs_rq;
> +
> + WARN_ON_ONCE(p != current);
> + p->sched_throttle_work.next = &p->sched_throttle_work;
> + /*
> + * If task is exiting, then there won't be a return to userspace, so we
> + * don't have to bother with any of this.
> + */
> + if ((p->flags & PF_EXITING))
> + return;
> +
> + CLASS(task_rq_lock, rq_guard)(p);
> + rq = rq_guard.rq;

The other way to write this is:

scoped_guard (task_rq_lock, p) {
struct rq *rq = scope.rq;

> + se = &p->se;
> + cfs_rq = cfs_rq_of(se);
> +
> + /*
> + * If not in limbo, then either replenish has happened or this task got
> + * migrated out of the throttled cfs_rq, move along
> + */
> + if (!cfs_rq->throttle_count)
> + return;
> +
> + update_rq_clock(rq);
> +
> + throttle_one_task(cfs_rq, p);
> +
> + resched_curr(rq);

}

> }