[PATCH 4.10 12/29] drm/i915: Stop using RP_DOWN_EI on Baytrail

From: Greg Kroah-Hartman
Date: Sun Apr 16 2017 - 04:12:10 EST


4.10-stable review patch. If anyone has any objections, please let me know.

------------------

From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

commit 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad upstream.

On Baytrail, we manually calculate busyness over the evaluation interval
to avoid issues with miscaluations with RC6 enabled. However, it turns
out that the DOWN_EI interrupt generator is completely bust - it
operates in two modes, continuous or never. Neither of which are
conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
compute everything from the UP_EI which does seem to correspond to the
desired interval.

v2: Fixup gen6_rps_pm_mask() as well
v3: Inline vlv_c0_above() to combine the now identical elapsed
calculation for up/down and simplify the threshold testing

Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@xxxxxxxxxxxxxxxxxx
Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@xxxxxxxxxxxxxxxxxx
(cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac)
Signed-off-by: Jani Nikula <jani.nikula@xxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
drivers/gpu/drm/i915/i915_drv.h | 2 -
drivers/gpu/drm/i915/i915_irq.c | 73 +++++++++++++++-------------------------
drivers/gpu/drm/i915/intel_pm.c | 5 +-
3 files changed, 32 insertions(+), 48 deletions(-)

--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1263,7 +1263,7 @@ struct intel_gen6_power_mgmt {
unsigned boosts;

/* manual wa residency calculations */
- struct intel_rps_ei up_ei, down_ei;
+ struct intel_rps_ei ei;

/*
* Protects RPS/RC6 register access and PCU communication.
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1046,68 +1046,51 @@ static void vlv_c0_read(struct drm_i915_
ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
}

-static bool vlv_c0_above(struct drm_i915_private *dev_priv,
- const struct intel_rps_ei *old,
- const struct intel_rps_ei *now,
- int threshold)
-{
- u64 time, c0;
- unsigned int mul = 100;
-
- if (old->cz_clock == 0)
- return false;
-
- if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
- mul <<= 8;
-
- time = now->cz_clock - old->cz_clock;
- time *= threshold * dev_priv->czclk_freq;
-
- /* Workload can be split between render + media, e.g. SwapBuffers
- * being blitted in X after being rendered in mesa. To account for
- * this we need to combine both engines into our activity counter.
- */
- c0 = now->render_c0 - old->render_c0;
- c0 += now->media_c0 - old->media_c0;
- c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
-
- return c0 >= time;
-}
-
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
{
- vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
- dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+ memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
}

static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
{
+ const struct intel_rps_ei *prev = &dev_priv->rps.ei;
struct intel_rps_ei now;
u32 events = 0;

- if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
return 0;

vlv_c0_read(dev_priv, &now);
if (now.cz_clock == 0)
return 0;

- if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
- if (!vlv_c0_above(dev_priv,
- &dev_priv->rps.down_ei, &now,
- dev_priv->rps.down_threshold))
- events |= GEN6_PM_RP_DOWN_THRESHOLD;
- dev_priv->rps.down_ei = now;
- }
+ if (prev->cz_clock) {
+ u64 time, c0;
+ unsigned int mul;
+
+ mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */
+ if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
+ mul <<= 8;
+
+ time = now.cz_clock - prev->cz_clock;
+ time *= dev_priv->czclk_freq;
+
+ /* Workload can be split between render + media,
+ * e.g. SwapBuffers being blitted in X after being rendered in
+ * mesa. To account for this we need to combine both engines
+ * into our activity counter.
+ */
+ c0 = now.render_c0 - prev->render_c0;
+ c0 += now.media_c0 - prev->media_c0;
+ c0 *= mul;

- if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
- if (vlv_c0_above(dev_priv,
- &dev_priv->rps.up_ei, &now,
- dev_priv->rps.up_threshold))
- events |= GEN6_PM_RP_UP_THRESHOLD;
- dev_priv->rps.up_ei = now;
+ if (c0 > time * dev_priv->rps.up_threshold)
+ events = GEN6_PM_RP_UP_THRESHOLD;
+ else if (c0 < time * dev_priv->rps.down_threshold)
+ events = GEN6_PM_RP_DOWN_THRESHOLD;
}

+ dev_priv->rps.ei = now;
return events;
}

@@ -4178,7 +4161,7 @@ void intel_irq_init(struct drm_i915_priv
/* Let's track the enabled rps events */
if (IS_VALLEYVIEW(dev_priv))
/* WaGsvRC0ResidencyMethod:vlv */
- dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
+ dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
else
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;

--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4913,8 +4913,9 @@ static u32 gen6_rps_pm_mask(struct drm_i
{
u32 mask = 0;

+ /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
if (val > dev_priv->rps.min_freq_softlimit)
- mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+ mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
if (val < dev_priv->rps.max_freq_softlimit)
mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;

@@ -5014,7 +5015,7 @@ void gen6_rps_busy(struct drm_i915_priva
{
mutex_lock(&dev_priv->rps.hw_lock);
if (dev_priv->rps.enabled) {
- if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+ if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
gen6_rps_reset_ei(dev_priv);
I915_WRITE(GEN6_PMINTRMSK,
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));