Re: [v4,1/6] drm/i915/skl: Add support for the SAGV, fix underrun hangs

From: Hans de Goede
Date: Tue Aug 02 2016 - 12:29:16 EST


Hi,

On 26-07-16 19:34, cpaul@xxxxxxxxxx wrote:
Since the watermark calculations for Skylake are still broken, we're apt
to hitting underruns very easily under multi-monitor configurations.
While it would be lovely if this was fixed, it's not. Another problem
that's been coming from this however, is the mysterious issue of
underruns causing full system hangs. An easy way to reproduce this with
a skylake system:

- Get a laptop with a skylake GPU, and hook up two external monitors to
it
- Move the cursor from the built-in LCD to one of the external displays
as quickly as you can
- You'll get a few pipe underruns, and eventually the entire system will
just freeze.

After doing a lot of investigation and reading through the bspec, I
found the existence of the SAGV, which is responsible for adjusting the
system agent voltage and clock frequencies depending on how much power
we need. According to the bspec:

"The display engine access to system memory is blocked during the
adjustment time. SAGV defaults to enabled. Software must use the
GT-driver pcode mailbox to disable SAGV when the display engine is not
able to tolerate the blocking time."

The rest of the bspec goes on to explain that software can simply leave
the SAGV enabled, and disable it when we use interlaced pipes/have more
then one pipe active.

Sure enough, with this patchset the system hangs resulting from pipe
underruns on Skylake have completely vanished on my T460s. Additionally,
the bspec mentions turning off the SAGV with more then one pipe enabled
as a workaround for display underruns. While this patch doesn't entirely
fix that, it looks like it does improve the situation a little bit so
it's likely this is going to be required to make watermarks on Skylake
fully functional.

Changes since v4:
- Use is_power_of_2 against active_crtcs to check whether we have > 1
pipe enabled
- Fix skl_sagv_get_hw_state(): (temp & 0x1) indicates disabled, 0x0
enabled
- Call skl_sagv_enable/disable() from pre/post-plane updates

This seems to not do what you want it to do, if I'm reading your changes
and the original code correct:

<snip>

@@ -4589,6 +4592,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
!old_primary_state->visible))
intel_post_enable_primary(&crtc->base);
}
+
+ if (old_intel_state->modeset &&
+ (old_intel_state->active_crtcs == 0 ||
+ is_power_of_2(old_intel_state->active_crtcs)))
+ skl_enable_sagv(dev_priv);
}


Here you are enabling the sagv if the *old* state allows it
(0 or 1 pipes active).

But judging from previous patches / the commit msg the
intent is to enable the sag if the *new* state allows
it, not the old one.

See e.g. the checks for calling intel_post_enable_primary()
which use both primary_state and old_primary_state

Also if you're going to respin you may want to switch
to using hweight as mentioned before, then you can simply
do something like (hamming_weight(active_crtcs) <= 1) as
condition to check for 0 or 1 active crtcs instead of
having 2 checks. Note please double check my logic here.

<snip>

@@ -4649,6 +4659,15 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
}

/*
+ * SKL workaround: bspec recommends we disable the SAGV when we have
+ * more then one pipe enabled
+ */
+ if (old_intel_state->modeset &&
+ !is_power_of_2(old_intel_state->active_crtcs) &&
+ old_intel_state->active_crtcs != 0)
+ skl_disable_sagv(dev_priv);
+
+ /*

Same thing, you're disabling the sagv if the old state
disallows it, but I believe you should be looking at the new
state instead.

Regards,

Hans





* If we're doing a modeset, we're done. No need to do any pre-vblank
* watermark programming here.
*/
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index e74d851..113bf48 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1709,6 +1709,8 @@ void ilk_wm_get_hw_state(struct drm_device *dev);
void skl_wm_get_hw_state(struct drm_device *dev);
void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
struct skl_ddb_allocation *ddb /* out */);
+int skl_enable_sagv(struct drm_i915_private *dev_priv);
+int skl_disable_sagv(struct drm_i915_private *dev_priv);
uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
bool ilk_disable_lp_wm(struct drm_device *dev);
int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 64d628c..55a9694 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2876,6 +2876,109 @@ skl_wm_plane_id(const struct intel_plane *plane)
}

static void
+skl_sagv_get_hw_state(struct drm_i915_private *dev_priv)
+{
+ u32 temp;
+ int ret;
+
+ if (IS_BROXTON(dev_priv))
+ return;
+
+ mutex_lock(&dev_priv->rps.hw_lock);
+ ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, &temp);
+ mutex_unlock(&dev_priv->rps.hw_lock);
+
+ if (!ret) {
+ dev_priv->skl_sagv_enabled = !(temp & 0x1);
+ } else {
+ /*
+ * If for some reason we can't access the SAGV state, follow
+ * the bspec and assume it's enabled
+ */
+ DRM_ERROR("Failed to get SAGV state, assuming enabled\n");
+ dev_priv->skl_sagv_enabled = true;
+ }
+}
+
+/*
+ * SAGV dynamically adjusts the system agent voltage and clock frequencies
+ * depending on power and performance requirements. The display engine access
+ * to system memory is blocked during the adjustment time. Having this enabled
+ * in multi-pipe configurations can cause issues (such as underruns causing
+ * full system hangs), and the bspec also suggests that software disable it
+ * when more then one pipe is enabled.
+ */
+int
+skl_enable_sagv(struct drm_i915_private *dev_priv)
+{
+ int ret;
+
+ if (IS_BROXTON(dev_priv))
+ return 0;
+ if (dev_priv->skl_sagv_enabled)
+ return 0;
+
+ mutex_lock(&dev_priv->rps.hw_lock);
+ DRM_DEBUG_KMS("Enabling the SAGV\n");
+
+ ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+ GEN9_SAGV_DYNAMIC_FREQ);
+ if (!ret)
+ dev_priv->skl_sagv_enabled = true;
+ else
+ DRM_ERROR("Failed to enable the SAGV\n");
+
+ /* We don't need to wait for SAGV when enabling */
+ mutex_unlock(&dev_priv->rps.hw_lock);
+ return ret;
+}
+
+int
+skl_disable_sagv(struct drm_i915_private *dev_priv)
+{
+ int ret = 0;
+ unsigned long timeout;
+ u32 temp;
+
+ if (IS_BROXTON(dev_priv))
+ return 0;
+ if (!dev_priv->skl_sagv_enabled)
+ return 0;
+
+ mutex_lock(&dev_priv->rps.hw_lock);
+ DRM_DEBUG_KMS("Disabling the SAGV\n");
+
+ /* bspec says to keep retrying for at least 1 ms */
+ timeout = jiffies + msecs_to_jiffies(1);
+ do {
+ ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+ GEN9_SAGV_DISABLE);
+ if (ret) {
+ DRM_ERROR("Failed to disable the SAGV\n");
+ goto out;
+ }
+
+ ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+ &temp);
+ if (ret) {
+ DRM_ERROR("Failed to check the status of the SAGV\n");
+ goto out;
+ }
+ } while (!(temp & 0x1) && time_before(jiffies, timeout));
+
+ if (temp & 0x1) {
+ dev_priv->skl_sagv_enabled = false;
+ } else {
+ ret = -1;
+ DRM_ERROR("Request to disable SAGV timed out\n");
+ }
+
+out:
+ mutex_unlock(&dev_priv->rps.hw_lock);
+ return ret;
+}
+
+static void
skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
const struct intel_crtc_state *cstate,
struct skl_ddb_entry *alloc, /* out */
@@ -4228,6 +4331,8 @@ void skl_wm_get_hw_state(struct drm_device *dev)
/* Easy/common case; just sanitize DDB now if everything off */
memset(ddb, 0, sizeof(*ddb));
}
+
+ skl_sagv_get_hw_state(dev_priv);
}

static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)