Re: [PATCH v3] sched/fair: Preempt if the current process is ineligible

From: Chunxin Zang
Date: Tue Jul 16 2024 - 05:53:28 EST




> On Jul 15, 2024, at 21:05, John Stills <johnstills191@xxxxxxxxx> wrote:
>
>
>> On Jun 21, 2024, at 21:53, Chunxin Zang <spring.cxz@xxxxxxxxx> wrote:
>>
>>
>>
>>> On Jun 20, 2024, at 20:51, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>>>
>>> On Thu, Jun 13, 2024 at 09:14:37PM +0800, Chunxin Zang wrote:
>>>> ---
>>>> kernel/sched/fair.c | 28 +++++++++++++++++++++-------
>>>> 1 file changed, 21 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>>> index 03be0d1330a6..21ef610ddb14 100644
>>>> --- a/kernel/sched/fair.c
>>>> +++ b/kernel/sched/fair.c
>>>> @@ -745,6 +745,15 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>>> return vruntime_eligible(cfs_rq, se->vruntime);
>>>> }
>>>>
>>>> +static bool check_entity_need_preempt(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>>> +{
>>>> + if (sched_feat(RUN_TO_PARITY) || cfs_rq->nr_running <= 1 ||
>>>> + entity_eligible(cfs_rq, se))
>>>> + return false;
>>>> +
>>>> + return true;
>>>> +}
>>>> +
>>>> static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
>>>> {
>>>> u64 min_vruntime = cfs_rq->min_vruntime;
>>>> @@ -974,11 +983,13 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
>>>> /*
>>>> * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
>>>> * this is probably good enough.
>>>> + *
>>>> + * return true if se need preempt
>>>> */
>>>> -static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>>> +static bool update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>>> {
>>>> if ((s64)(se->vruntime - se->deadline) < 0)
>>>> - return;
>>>> + return false;
>>>>
>>>> /*
>>>> * For EEVDF the virtual time slope is determined by w_i (iow.
>>>> @@ -995,10 +1006,7 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>>> /*
>>>> * The task has consumed its request, reschedule.
>>>> */
>>>> - if (cfs_rq->nr_running > 1) {
>>>> - resched_curr(rq_of(cfs_rq));
>>>> - clear_buddies(cfs_rq, se);
>>>> - }
>>>> + return true;
>>>> }
>>>>
>>>> #include "pelt.h"
>>>> @@ -1157,6 +1165,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
>>>> {
>>>> struct sched_entity *curr = cfs_rq->curr;
>>>> s64 delta_exec;
>>>> + bool need_preempt;
>>>>
>>>> if (unlikely(!curr))
>>>> return;
>>>> @@ -1166,12 +1175,17 @@ static void update_curr(struct cfs_rq *cfs_rq)
>>>> return;
>>>>
>>>> curr->vruntime += calc_delta_fair(delta_exec, curr);
>>>> - update_deadline(cfs_rq, curr);
>>>> + need_preempt = update_deadline(cfs_rq, curr);
>>>> update_min_vruntime(cfs_rq);
>>>>
>>>> if (entity_is_task(curr))
>>>> update_curr_task(task_of(curr), delta_exec);
>>>>
>>>> + if (need_preempt || check_entity_need_preempt(cfs_rq, curr)) {
>>>> + resched_curr(rq_of(cfs_rq));
>>>> + clear_buddies(cfs_rq, curr);
>>>> + }
>>>> +
>>>> account_cfs_rq_runtime(cfs_rq, delta_exec);
>>>> }
>>> Yeah sorry no. This will mess up the steady state schedule. At best we
>>> can do something like the below which will make PREEMPT_SHORT a little
>>> more effective I suppose.
>>>
>>>
>>> --- a/kernel/sched/fair.c
>>> +++ b/kernel/sched/fair.c
>>> @@ -985,10 +985,10 @@ static void clear_buddies(struct cfs_rq
>>> * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
>>> * this is probably good enough.
>>> */
>>> -static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>> +static bool update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>> {
>>> if ((s64)(se->vruntime - se->deadline) < 0)
>>> - return;
>>> + return false;
>>>
>>> /*
>>> * For EEVDF the virtual time slope is determined by w_i (iow.
>>> @@ -1005,10 +1005,7 @@ static void update_deadline(struct cfs_r
>>> /*
>>> * The task has consumed its request, reschedule.
>>> */
>>> - if (cfs_rq->nr_running > 1) {
>>> - resched_curr(rq_of(cfs_rq));
>>> - clear_buddies(cfs_rq, se);
>>> - }
>>> + return true;
>>> }
>>>
>>> #include "pelt.h"
>>> @@ -1168,6 +1165,8 @@ static void update_curr(struct cfs_rq *c
>>> {
>>> struct sched_entity *curr = cfs_rq->curr;
>>> s64 delta_exec;
>>> + struct rq *rq;
>>> + bool resched;
>>>
>>> if (unlikely(!curr))
>>> return;
>>> @@ -1177,13 +1176,23 @@ static void update_curr(struct cfs_rq *c
>>> return;
>>>
>>> curr->vruntime += calc_delta_fair(delta_exec, curr);
>>> - update_deadline(cfs_rq, curr);
>>> + resched = update_deadline(cfs_rq, curr);
>>> update_min_vruntime(cfs_rq);
>>>
>>> if (entity_is_task(curr))
>>> update_curr_task(task_of(curr), delta_exec);
>>>
>>> account_cfs_rq_runtime(cfs_rq, delta_exec);
>>> +
>>> + rq = rq_of(cfs_rq);
>>> + if (rq->nr_running == 1)
>>> + return;
>>> +
>>> + if (resched ||
>>> + (curr->vlag != curr->deadline && !entity_eligible(cfs_rq, curr))) {
>>> + resched_curr(rq);
>>> + clear_buddies(cfs_rq, curr);
>>> + }
>>> }
>>>
>>> static void update_curr_fair(struct rq *rq)
>> Hi peter
>>
>> Got it. If I understand correctly, modifications to basic interfaces like update_curr
>> should be appropriate and not too aggressive. Additionally, these changes have
>> already shown significant improvements in scheduling delay (test results are at the
>> end). How about we limit this patch to these changes for now? Actually, I also want
>> to try a more aggressive preemption under NO_RUN_TO_PARITY, but it might be
>> better to consider this comprehensively after integrating the changes from your
>> latest branch.
>>
>>
>> Comparison of this modification with the mainline EEVDF in cyclictest.
>>
>> EEVDF PATCH EEVDF-NO_PARITY PATCH-NO_PARITY
>>
>> LNICE(-19) # Avg Latencies: 00191 00162 00089 00080
>>
>> LNICE(0) # Avg Latencies: 00466 00404 00289 00285
>>
>> LNICE(19) # Avg Latencies: 37151 38781 18293 19315
>>
>> thanks
>> Chunxin
>>
>>
> Hi Chunxin
>
> The latency test results look great. Have you conducted tests in other scenarios, such
> as performance testing in production networks or machine learning?
>
> --
> thanks,
> John
>
>

Hi John

Due to limited resources in my environment, over the past month, I have mainly conducted
basic scheduling latency tests (hackbench + cyclictest). Currently, this modification has been
merged into Peter's eevdf branch and is expected to be released with Peter's other changes
in the next version. I believe there will be more test data available at that time.

[sched/eevdf: Allow shorter slices to wakeup-preempt]
https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=sched/eevdf&id=87ca38328760c9834c465d6939b4e89aa8354ac3

thanks
Chunxin