Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()

From: Peter Zijlstra
Date: Thu Mar 30 2017 - 10:15:00 EST

Next message: Theodore Ts'o: "Re: [PATCH] serial: Do not treat the IIR register as a bitfield"
Previous message: Denis Kirjanov: "[PATCH] tty/hvc_console: fix console lock ordering with spinlock"
In reply to: Yuyang Du: "Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()"
Next in thread: Paul Turner: "Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Thu, Mar 30, 2017 at 02:16:58PM +0200, Peter Zijlstra wrote:
> On Thu, Mar 30, 2017 at 04:21:08AM -0700, Paul Turner wrote:

> > > +
> > > + if (unlikely(periods >= LOAD_AVG_MAX_N))
> > > return LOAD_AVG_MAX;

> >
> > Is this correct in the iterated periods > LOAD_AVG_MAX_N case?
> > I don't think the decay above is guaranteed to return these to zero.
>
> Ah!
>
> Indeed, so decay_load() needs LOAD_AVG_PERIOD * 63 before it truncates
> to 0, because every LOAD_AVG_PERIOD we half the value; loose 1 bit; so
> 63 of those and we're 0.
>
> But __accumulate_sum() OTOH returns LOAD_AVG_MAX after only
> LOAD_AVG_MAX_N, which < LOAD_AVG_PERIOD * 63.
>
> So yes, combined we exceed LOAD_AVG_MAX, which is bad. Let me think what
> to do about that.

So at the very least it should be decay_load(LOAD_AVG_MAX, 1) (aka
LOAD_AVG_MAX - 1024), but that still doesn't account for the !0
decay_load() of the first segment.

I'm thinking that we can compute the middle segment, by taking the max
value and chopping off the ends, like:

p
c2 = 1024 \Sum y^n
n=1

inf inf
= 1024 ( \Sum y^n - \Sum y^n - y^0 )
n=0 n=p

Which gives something like the below.. Or am I completely off my rocker?

---
kernel/sched/fair.c | 70 ++++++++++++++---------------------------------------
1 file changed, 18 insertions(+), 52 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 76f67b3e34d6..4f17ec0a378a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2744,26 +2744,6 @@ static const u32 runnable_avg_yN_inv[] = {
};

/*
- * Precomputed \Sum y^k { 1<=k<=n }. These are floor(true_value) to prevent
- * over-estimates when re-combining.
- */
-static const u32 runnable_avg_yN_sum[] = {
- 0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
- 9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
- 17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
-};
-
-/*
- * Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to
- * lower integers. See Documentation/scheduler/sched-avg.txt how these
- * were generated:
- */
-static const u32 __accumulated_sum_N32[] = {
- 0, 23371, 35056, 40899, 43820, 45281,
- 46011, 46376, 46559, 46650, 46696, 46719,
-};
-
-/*
* Approximate:
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
*/
@@ -2795,40 +2775,25 @@ static u64 decay_load(u64 val, u64 n)
return val;
}

-static u32 __accumulate_sum(u64 periods, u32 period_contrib, u32 remainder)
+static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
{
- u32 c1, c2, c3 = remainder; /* y^0 == 1 */
-
- if (!periods)
- return remainder - period_contrib;
-
- if (unlikely(periods >= LOAD_AVG_MAX_N))
- return LOAD_AVG_MAX;
+ u32 c1, c2, c3 = d3; /* y^0 == 1 */

/*
* c1 = d1 y^(p+1)
*/
- c1 = decay_load((u64)(1024 - period_contrib), periods);
+ c1 = decay_load((u64)d1, periods);

- periods -= 1;
/*
- * For updates fully spanning n periods, the contribution to runnable
- * average will be:
+ * p
+ * c2 = 1024 \Sum y^n
+ * n=1
*
- * c2 = 1024 \Sum y^n
- *
- * We can compute this reasonably efficiently by combining:
- *
- * y^PERIOD = 1/2 with precomputed 1024 \Sum y^n {for: n < PERIOD}
+ * inf inf
+ * = 1024 ( \Sum y^n - \Sum y^n - y^0 )
+ * n=0 n=p+1
*/
- if (likely(periods <= LOAD_AVG_PERIOD)) {
- c2 = runnable_avg_yN_sum[periods];
- } else {
- c2 = __accumulated_sum_N32[periods/LOAD_AVG_PERIOD];
- periods %= LOAD_AVG_PERIOD;
- c2 = decay_load(c2, periods);
- c2 += runnable_avg_yN_sum[periods];
- }
+ c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;

return c1 + c2 + c3;
}
@@ -2861,8 +2826,8 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
unsigned long weight, int running, struct cfs_rq *cfs_rq)
{
unsigned long scale_freq, scale_cpu;
+ u32 contrib = delta;
u64 periods;
- u32 contrib;

scale_freq = arch_scale_freq_capacity(NULL, cpu);
scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
@@ -2880,13 +2845,14 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
decay_load(cfs_rq->runnable_load_sum, periods);
}
sa->util_sum = decay_load((u64)(sa->util_sum), periods);
- }

- /*
- * Step 2
- */
- delta %= 1024;
- contrib = __accumulate_sum(periods, sa->period_contrib, delta);
+ /*
+ * Step 2
+ */
+ delta %= 1024;
+ contrib = __accumulate_pelt_segments(periods,
+ 1024 - sa->period_contrib, delta);
+ }
sa->period_contrib = delta;

contrib = cap_scale(contrib, scale_freq);

Next message: Theodore Ts'o: "Re: [PATCH] serial: Do not treat the IIR register as a bitfield"
Previous message: Denis Kirjanov: "[PATCH] tty/hvc_console: fix console lock ordering with spinlock"
In reply to: Yuyang Du: "Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()"
Next in thread: Paul Turner: "Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]