Re: [PATCH] psi: clean up time collection functions

From: Suren Baghdasaryan
Date: Tue Jun 08 2021 - 16:41:34 EST


On Tue, Jun 8, 2021 at 12:04 PM Johannes Weiner <hannes@xxxxxxxxxxx> wrote:
>
> The functions to read the per-cpu time buckets and aggregate them
> don't have the greatest names and an awkward calling convention. Clean
> this up to make things a bit more readable:
>
> - Rename get_recent_times() to read_cpu_states() to make it clearer
> this is about extracting psi state from one cpu, and not just the
> times, either. Remove the pchanged_states return parameter and make
> it the function's return value; rename the local variable 'states',
> as it doesn't reflect changed states, but currently active ones.
>
> - rename collect_percpu_times() to aggregate_cpus(), to indicate that
> actual data processing happens there
>
> - move calc_avgs() out of the way, closer to where it's being used.
>
> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>

Reviewed-by: Suren Baghdasaryan <surenb@xxxxxxxxxx>

> ---
> kernel/sched/psi.c | 90 ++++++++++++++++++++++------------------------
> 1 file changed, 42 insertions(+), 48 deletions(-)
>
> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index 9d647d974f55..1faf383f6ec4 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -238,17 +238,15 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
> }
> }
>
> -static void get_recent_times(struct psi_group *group, int cpu,
> - enum psi_aggregators aggregator, u32 *times,
> - u32 *pchanged_states)
> +static u32 snapshot_cpu_states(struct psi_group *group, int cpu,
> + enum psi_aggregators aggregator, u32 *times)
> {
> struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
> u64 now, state_start;
> enum psi_states s;
> unsigned int seq;
> u32 state_mask;
> -
> - *pchanged_states = 0;
> + u32 states = 0;
>
> /* Snapshot a coherent view of the CPU state */
> do {
> @@ -279,37 +277,18 @@ static void get_recent_times(struct psi_group *group, int cpu,
>
> times[s] = delta;
> if (delta)
> - *pchanged_states |= (1 << s);
> + states |= (1 << s);
> }
> -}
>
> -static void calc_avgs(unsigned long avg[3], int missed_periods,
> - u64 time, u64 period)
> -{
> - unsigned long pct;
> -
> - /* Fill in zeroes for periods of no activity */
> - if (missed_periods) {
> - avg[0] = calc_load_n(avg[0], EXP_10s, 0, missed_periods);
> - avg[1] = calc_load_n(avg[1], EXP_60s, 0, missed_periods);
> - avg[2] = calc_load_n(avg[2], EXP_300s, 0, missed_periods);
> - }
> -
> - /* Sample the most recent active period */
> - pct = div_u64(time * 100, period);
> - pct *= FIXED_1;
> - avg[0] = calc_load(avg[0], EXP_10s, pct);
> - avg[1] = calc_load(avg[1], EXP_60s, pct);
> - avg[2] = calc_load(avg[2], EXP_300s, pct);
> + return states;
> }
>
> -static void collect_percpu_times(struct psi_group *group,
> - enum psi_aggregators aggregator,
> - u32 *pchanged_states)
> +static u32 aggregate_cpus(struct psi_group *group,
> + enum psi_aggregators aggregator)
> {
> u64 deltas[NR_PSI_STATES - 1] = { 0, };
> unsigned long nonidle_total = 0;
> - u32 changed_states = 0;
> + u32 states = 0;
> int cpu;
> int s;
>
> @@ -324,11 +303,8 @@ static void collect_percpu_times(struct psi_group *group,
> for_each_possible_cpu(cpu) {
> u32 times[NR_PSI_STATES];
> u32 nonidle;
> - u32 cpu_changed_states;
>
> - get_recent_times(group, cpu, aggregator, times,
> - &cpu_changed_states);
> - changed_states |= cpu_changed_states;
> + states |= snapshot_cpu_states(group, cpu, aggregator, times);
>
> nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]);
> nonidle_total += nonidle;
> @@ -354,15 +330,34 @@ static void collect_percpu_times(struct psi_group *group,
> group->total[aggregator][s] +=
> div_u64(deltas[s], max(nonidle_total, 1UL));
>
> - if (pchanged_states)
> - *pchanged_states = changed_states;
> + return states;
> +}
> +
> +static void calc_avgs(unsigned long avg[3], int missed_periods,
> + u64 time, u64 period)
> +{
> + unsigned long pct;
> +
> + /* Fill in zeroes for periods of no activity */
> + if (missed_periods) {
> + avg[0] = calc_load_n(avg[0], EXP_10s, 0, missed_periods);
> + avg[1] = calc_load_n(avg[1], EXP_60s, 0, missed_periods);
> + avg[2] = calc_load_n(avg[2], EXP_300s, 0, missed_periods);
> + }
> +
> + /* Sample the most recent active period */
> + pct = div_u64(time * 100, period);
> + pct *= FIXED_1;
> + avg[0] = calc_load(avg[0], EXP_10s, pct);
> + avg[1] = calc_load(avg[1], EXP_60s, pct);
> + avg[2] = calc_load(avg[2], EXP_300s, pct);
> }
>
> static void update_averages(struct psi_group *group)
> {
> unsigned long missed_periods = 0;
> u64 now, expires, period;
> - u32 changed_states;
> + u32 states;
> int s;
>
> /* avgX= */
> @@ -402,7 +397,7 @@ static void update_averages(struct psi_group *group)
> group->avg_last_update = now;
> group->avg_next_update = expires + ((1 + missed_periods) * psi_period);
>
> - collect_percpu_times(group, PSI_AVGS, &changed_states);
> + states = aggregate_cpus(group, PSI_AVGS);
> for (s = 0; s < NR_PSI_STATES - 1; s++) {
> u32 sample;
>
> @@ -430,7 +425,7 @@ static void update_averages(struct psi_group *group)
> calc_avgs(group->avg[s], missed_periods, sample, period);
> }
>
> - if (changed_states & (1 << PSI_NONIDLE)) {
> + if (states & (1 << PSI_NONIDLE)) {
> unsigned long delay;
>
> delay = nsecs_to_jiffies(group->avg_next_update - now) + 1;
> @@ -585,24 +580,24 @@ static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
>
> static void psi_poll_work(struct psi_group *group)
> {
> - u32 changed_states;
> + unsigned long delay;
> + u32 states;
> u64 now;
>
> mutex_lock(&group->trigger_lock);
>
> now = sched_clock();
> + states = aggregate_cpus(group, PSI_POLL);
>
> - collect_percpu_times(group, PSI_POLL, &changed_states);
> -
> - if (changed_states & group->poll_states) {
> + if (states & group->poll_states) {
> /* Initialize trigger windows when entering polling mode */
> if (now > group->polling_until)
> init_triggers(group, now);
>
> /*
> - * Keep the monitor active for at least the duration of the
> - * minimum tracking window as long as monitor states are
> - * changing.
> + * Keep the monitor active for at least the duration
> + * of the minimum tracking window after a polled state
> + * has been observed.
> */
> group->polling_until = now +
> group->poll_min_period * UPDATES_PER_WINDOW;
> @@ -616,9 +611,8 @@ static void psi_poll_work(struct psi_group *group)
> if (now >= group->polling_next_update)
> group->polling_next_update = update_triggers(group, now);
>
> - psi_schedule_poll_work(group,
> - nsecs_to_jiffies(group->polling_next_update - now) + 1);
> -
> + delay = nsecs_to_jiffies(group->polling_next_update - now) + 1;
> + psi_schedule_poll_work(group, delay);
> out:
> mutex_unlock(&group->trigger_lock);
> }
> --
> 2.32.0
>