[PATCH] cpufreq: stats: Switch to ktime and msec instead of jiffies and usertime
From: Viresh Kumar
Date: Tue Nov 10 2020 - 06:07:45 EST
The cpufreq and thermal core, both provide sysfs statistics to help
userspace learn about the behavior of frequencies and cooling states.
This is how they look:
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:208000 11
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:432000 147
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:729000 1600
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:960000 879
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:1200000 399
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state0 4097
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state1 8932
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state2 15868
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state3 1384
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state4 103
Here, state0 of thermal corresponds to the highest frequency of the CPU,
i.e. 1200000 and state4 to the lowest one.
While both of these try to show similar kind of data (which can still be
very much different from each other), the values looked different (by a
factor of 10, i.e. thermal's time_in_state is almost 10 times that of
cpufreq time_in_state).
This comes from the fact that cpufreq core displays the time in usertime
units (10 ms).
It would be better if both the frameworks displayed times in the same
unit as the users may need to correlate between them and different
scales just make it awkward. And the choice of thermal core for that
(msec) seems to be a better choice as it is easier to read.
The thermal core also does the stats calculations using ktime, which is
much more accurate as compared to jiffies used by cpufreq core.
This patch updates the cpufreq core to use ktime for the internal
calculations and changes the units of time_in_state to msec.
The results look like this after this commit:
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:208000 13
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:432000 790
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:729000 12492
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:960000 13259
/sys/devices/system/cpu/cpufreq/policy0/stats/time_in_state:1200000 3830
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state0 3888
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state1 13432
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state2 12336
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state3 740
/sys/class/thermal/cooling_device0/stats/time_in_state_ms:state4 0
FWIW, tools/power/cpupower/ does consume the time_in_state values from
the sysfs files but it is independent of the unit of the time and didn't
require an update.
Signed-off-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
---
Documentation/cpu-freq/cpufreq-stats.rst | 5 +--
drivers/cpufreq/cpufreq_stats.c | 47 +++++++++++++-----------
2 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/Documentation/cpu-freq/cpufreq-stats.rst b/Documentation/cpu-freq/cpufreq-stats.rst
index 9ad695b1c7db..9f94012a882f 100644
--- a/Documentation/cpu-freq/cpufreq-stats.rst
+++ b/Documentation/cpu-freq/cpufreq-stats.rst
@@ -64,9 +64,8 @@ need for a reboot.
This gives the amount of time spent in each of the frequencies supported by
this CPU. The cat output will have "<frequency> <time>" pair in each line, which
-will mean this CPU spent <time> usertime units of time at <frequency>. Output
-will have one line for each of the supported frequencies. usertime units here
-is 10mS (similar to other time exported in /proc).
+will mean this CPU spent <time> msec of time at <frequency>. Output will have
+one line for each of the supported frequencies.
::
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 6cd5c8ab5d49..e054ada291e7 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -14,35 +14,38 @@
struct cpufreq_stats {
unsigned int total_trans;
- unsigned long long last_time;
+ ktime_t last_time;
unsigned int max_state;
unsigned int state_num;
unsigned int last_index;
- u64 *time_in_state;
+ ktime_t *time_in_state;
unsigned int *freq_table;
unsigned int *trans_table;
/* Deferred reset */
unsigned int reset_pending;
- unsigned long long reset_time;
+ ktime_t reset_time;
};
-static void cpufreq_stats_update(struct cpufreq_stats *stats,
- unsigned long long time)
+static void cpufreq_stats_update(struct cpufreq_stats *stats, ktime_t time)
{
- unsigned long long cur_time = get_jiffies_64();
+ ktime_t cur_time = ktime_get(), delta;
- stats->time_in_state[stats->last_index] += cur_time - time;
+ delta = ktime_sub(cur_time, time);
+ stats->time_in_state[stats->last_index] =
+ ktime_add(stats->time_in_state[stats->last_index], delta);
stats->last_time = cur_time;
}
static void cpufreq_stats_reset_table(struct cpufreq_stats *stats)
{
- unsigned int count = stats->max_state;
+ unsigned int count = stats->max_state, i;
+
+ for (i = 0; i < count; i++)
+ stats->time_in_state[i] = ktime_set(0, 0);
- memset(stats->time_in_state, 0, count * sizeof(u64));
memset(stats->trans_table, 0, count * count * sizeof(int));
- stats->last_time = get_jiffies_64();
+ stats->last_time = ktime_get();
stats->total_trans = 0;
/* Adjust for the time elapsed since reset was requested */
@@ -70,7 +73,7 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
{
struct cpufreq_stats *stats = policy->stats;
bool pending = READ_ONCE(stats->reset_pending);
- unsigned long long time;
+ ktime_t time, now = ktime_get(), delta;
ssize_t len = 0;
int i;
@@ -82,18 +85,20 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
* before the reset_pending read above.
*/
smp_rmb();
- time = get_jiffies_64() - READ_ONCE(stats->reset_time);
+ time = ktime_sub(now, READ_ONCE(stats->reset_time));
} else {
- time = 0;
+ time = ktime_set(0, 0);;
}
} else {
time = stats->time_in_state[i];
- if (i == stats->last_index)
- time += get_jiffies_64() - stats->last_time;
+ if (i == stats->last_index) {
+ delta = ktime_sub(now, stats->last_time);
+ time = ktime_add(delta, time);
+ }
}
len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i],
- jiffies_64_to_clock_t(time));
+ ktime_to_ms(time));
}
return len;
}
@@ -109,7 +114,7 @@ static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf,
* Defer resetting of stats to cpufreq_stats_record_transition() to
* avoid races.
*/
- WRITE_ONCE(stats->reset_time, get_jiffies_64());
+ WRITE_ONCE(stats->reset_time, ktime_get());
/*
* The memory barrier below is to prevent the readers of reset_time from
* seeing a stale or partially updated value.
@@ -228,9 +233,9 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
if (!stats)
return;
- alloc_size = count * sizeof(int) + count * sizeof(u64);
-
- alloc_size += count * count * sizeof(int);
+ alloc_size = count * sizeof(*stats->time_in_state);
+ alloc_size += count * sizeof(*stats->freq_table);
+ alloc_size += count * count * sizeof(*stats->trans_table);
/* Allocate memory for time_in_state/freq_table/trans_table in one go */
stats->time_in_state = kzalloc(alloc_size, GFP_KERNEL);
@@ -249,7 +254,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
stats->freq_table[i++] = pos->frequency;
stats->state_num = i;
- stats->last_time = get_jiffies_64();
+ stats->last_time = ktime_get();
stats->last_index = freq_table_get_index(stats, policy->cur);
policy->stats = stats;
--
2.25.0.rc1.19.g042ed3e048af