Re: [PATCH] cpufreq: queue policy->update work to rt thread to reduce its schedule latency

From: Gaowei Pu
Date: Mon Jul 22 2024 - 04:27:18 EST




On 2024/7/22 10:30, Gaowei Pu wrote:
> Hi Tim,
>
> On 2024/7/19 6:03, Tim Chen wrote:
>> On Wed, 2024-07-17 at 14:33 +0800, Gaowei Pu wrote:
>>> Currently we encountered a problem that the cpufreq boost latency
>>> is about 10 milliseconds or worse when we boost through cpufreq QOS request
>>> under high workload scenarios, while the boost latency mainly consumed by
>>> schedule latency of policy->update work.
>>
>> What is the tail latency now after your change?

sorry missed this.
the tail latency now is about within 50 microseconds after my change.

>>
>>>
>>> We should ensure the low schedule latency of cpu frequency limits work
>>> to meet performance and power demands. so queue the policy->update work
>>> to rt thread to reduce its schedule latency.
>>
>> If my understanding is correct, kthread has a default nice
>> value of 0 and is not a rt thread. 
>>
>> I think the gain you see is
>> your patch created a dedicated kthread work queue on CPU 0.
>> The work from policy change no longer have to compete time with other
>> requests coming from schedule_work().
> It's not just other requests coming from schedule_work(), also some normal
> cfs tasks running on the same cpu.
>
> In order to not competing time with the above threads, i change the thread
> policy to rt and prio set to 98 to reduce the schedule latency.
>>
>> If the policy change really needs to get ahead
>> of other tasks, I think you need a dedicated
>> workqueue with alloc_workqueue() using WQ_HIGHPRI flag.

> I think the cpufreq boost or limit action should be trigger in time to meet
> performance and power demands. An dedicated workqueue with highpri will be
> better but maybe not good enough because cfs pick or preempt policy is not
> purely based on thread nice value. So i think the final solution is rt thread
> and the policy change work deserves it :)
> thanks.
>>
>> Tim
>>
>>>
>>> Signed-off-by: Gaowei Pu <pugaowei@xxxxxxxx>
>>> ---
>>> drivers/cpufreq/cpufreq.c | 24 ++++++++++++++++++------
>>> include/linux/cpufreq.h | 4 +++-
>>> 2 files changed, 21 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>> index a45aac17c20f..e6e42a3ba9ab 100644
>>> --- a/drivers/cpufreq/cpufreq.c
>>> +++ b/drivers/cpufreq/cpufreq.c
>>> @@ -1193,7 +1193,7 @@ void refresh_frequency_limits(struct cpufreq_policy *policy)
>>> }
>>> EXPORT_SYMBOL(refresh_frequency_limits);
>>>
>>> -static void handle_update(struct work_struct *work)
>>> +static void handle_update(struct kthread_work *work)
>>> {
>>> struct cpufreq_policy *policy =
>>> container_of(work, struct cpufreq_policy, update);
>>> @@ -1209,7 +1209,7 @@ static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq,
>>> {
>>> struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_min);
>>>
>>> - schedule_work(&policy->update);
>>> + kthread_queue_work(policy->worker, &policy->update);
>>> return 0;
>>> }
>>>
>>> @@ -1218,7 +1218,7 @@ static int cpufreq_notifier_max(struct notifier_block *nb, unsigned long freq,
>>> {
>>> struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_max);
>>>
>>> - schedule_work(&policy->update);
>>> + kthread_queue_work(policy->worker, &policy->update);
>>> return 0;
>>> }
>>>
>>> @@ -1301,15 +1301,25 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
>>> goto err_min_qos_notifier;
>>> }
>>>
>>> + policy->worker = kthread_create_worker_on_cpu(cpu, 0, "policy_worker%d", cpu);
>>> + if (IS_ERR(policy->worker)) {
>>> + dev_err(dev, "Failed to create policy_worker%d\n", cpu);
>>> + goto err_max_qos_notifier;
>>> + }
>>> +
>>> + sched_set_fifo_low(policy->worker->task);
>>> INIT_LIST_HEAD(&policy->policy_list);
>>> init_rwsem(&policy->rwsem);
>>> spin_lock_init(&policy->transition_lock);
>>> init_waitqueue_head(&policy->transition_wait);
>>> - INIT_WORK(&policy->update, handle_update);
>>> + kthread_init_work(&policy->update, handle_update);
>>>
>>> policy->cpu = cpu;
>>> return policy;
>>>
>>> +err_max_qos_notifier:
>>> + freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MAX,
>>> + &policy->nb_max);
>>> err_min_qos_notifier:
>>> freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN,
>>> &policy->nb_min);
>>> @@ -1353,7 +1363,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>>> &policy->nb_min);
>>>
>>> /* Cancel any pending policy->update work before freeing the policy. */
>>> - cancel_work_sync(&policy->update);
>>> + kthread_cancel_work_sync(&policy->update);
>>> + if (policy->worker)
>>> + kthread_destroy_worker(policy->worker);
>>>
>>> if (policy->max_freq_req) {
>>> /*
>>> @@ -1802,7 +1814,7 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b
>>>
>>> cpufreq_out_of_sync(policy, new_freq);
>>> if (update)
>>> - schedule_work(&policy->update);
>>> + kthread_queue_work(policy->worker, &policy->update);
>>> }
>>>
>>> return new_freq;
>>> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
>>> index 20f7e98ee8af..73029daddfc5 100644
>>> --- a/include/linux/cpufreq.h
>>> +++ b/include/linux/cpufreq.h
>>> @@ -20,6 +20,7 @@
>>> #include <linux/spinlock.h>
>>> #include <linux/sysfs.h>
>>> #include <linux/minmax.h>
>>> +#include <linux/kthread.h>
>>>
>>> /*********************************************************************
>>> * CPUFREQ INTERFACE *
>>> @@ -77,8 +78,9 @@ struct cpufreq_policy {
>>> void *governor_data;
>>> char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */
>>>
>>> - struct work_struct update; /* if update_policy() needs to be
>>> + struct kthread_work update; /* if update_policy() needs to be
>>> * called, but you're in IRQ context */
>>> + struct kthread_worker *worker;
>>>
>>> struct freq_constraints constraints;
>>> struct freq_qos_request *min_freq_req;
>>
>