Re: [PATCH] DRM: i915: Fix random GPU hang, Bug 156851

From: Jani Nikula
Date: Thu Sep 15 2016 - 04:50:42 EST



Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97814

On Thu, 15 Sep 2016, Cheng Cao <bobcaocheng@xxxxxxx> wrote:
> Signed-off-by: Cheng Cao <bobcaocheng@xxxxxxx>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++
> drivers/gpu/drm/i915/i915_gem_stolen.c | 61 ++++++++++++++++-----------------
> drivers/gpu/drm/i915/i915_reg.h | 6 ++++
> drivers/gpu/drm/i915/intel_ringbuffer.c | 20 ++++++++++-
> 4 files changed, 60 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7a30af7..0b05dd9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2907,6 +2907,12 @@ static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
> if (bdw_gmch_ctl > 4)
> bdw_gmch_ctl = 4;
> #endif
> +#ifdef CONFIG_X86_64
> + /* Limit 64b platforms to a 4GB GGTT */
> + /* DMA 4GB protection */
> + if (bdw_gmch_ctl > 8)
> + bdw_gmch_ctl = 8;
> +#endif
>
> return bdw_gmch_ctl << 20;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 66be299a1..da272ae 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -352,47 +352,44 @@ static void gen8_get_stolen_reserved(struct drm_i915_private *dev_priv,
> unsigned long *base, unsigned long *size)
> {
> uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
> + unsigned long stolen_top;
> + struct i915_ggtt *ggtt = &dev_priv->ggtt;
>
> *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
>
> switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
> case GEN8_STOLEN_RESERVED_1M:
> - *size = 1024 * 1024;
> + *size = 1 << 10 << 10;
> break;
> case GEN8_STOLEN_RESERVED_2M:
> - *size = 2 * 1024 * 1024;
> + *size = 2 << 10 << 10;
> break;
> case GEN8_STOLEN_RESERVED_4M:
> - *size = 4 * 1024 * 1024;
> + *size = 4 << 10 << 10;
> break;
> case GEN8_STOLEN_RESERVED_8M:
> - *size = 8 * 1024 * 1024;
> + *size = 8 << 10 << 10;
> break;
> default:
> - *size = 8 * 1024 * 1024;
> - MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
> - }
> -}
> -
> -static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
> - unsigned long *base, unsigned long *size)
> -{
> - struct i915_ggtt *ggtt = &dev_priv->ggtt;
> - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
> - unsigned long stolen_top;
> + /* Whatever if it is a BDW device or SKL device
> + * Or others devices..
> + * This way is always going to work on 5th
> + * generation Intel Processer
> + */
> + stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
>
> - stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
> + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
>
> - *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
> + /* MLIMIT - MBASE => PEG */
> + /* -- mobile-5th-gen-core-family-datasheet-vol-2.pdf */
> + if (*base == 0) {
> + *size = 0;
> + MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
> + } else
> + *size = stolen_top - *base;
>
> - /* On these platforms, the register doesn't have a size field, so the
> - * size is the distance between the base and the top of the stolen
> - * memory. We also have the genuine case where base is zero and there's
> - * nothing reserved. */
> - if (*base == 0)
> - *size = 0;
> - else
> - *size = stolen_top - *base;
> + break;
> + }
> }
>
> int i915_gem_init_stolen(struct drm_device *dev)
> @@ -442,14 +439,14 @@ int i915_gem_init_stolen(struct drm_device *dev)
> gen7_get_stolen_reserved(dev_priv, &reserved_base,
> &reserved_size);
> break;
> + case 8:
> + gen8_get_stolen_reserved(dev_priv, &reserved_base,
> + &reserved_size);
> + break;
> default:
> - if (IS_BROADWELL(dev_priv) ||
> - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev))
> - bdw_get_stolen_reserved(dev_priv, &reserved_base,
> - &reserved_size);
> - else
> - gen8_get_stolen_reserved(dev_priv, &reserved_base,
> - &reserved_size);
> + // FIXME: This seemed like going to work
> + gen8_get_stolen_reserved(dev_priv, &reserved_base,
> + &reserved_size);
> break;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bf2cad3..3dce37b 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1748,6 +1748,12 @@ enum skl_disp_power_wells {
> #define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base)+0x1c8) /* gen8+ */
> #define RING_CTX_TIMESTAMP(base) _MMIO((base)+0x3a8) /* gen8+ */
>
> +// 64 bit, low 32 preserved
> +#define IOTLB_INVALID(base) _MMIO((base)+0x508 + 4) /* gen8+ */
> +#define IOTLB_INVALID_IVT (1<<31)
> +#define IOTLB_GLOBAL_INV_REQ (1<<28)
> +#define IOTLB_INVALID_IAIG (1<<25)
> +
> #define ERROR_GEN6 _MMIO(0x40a0)
> #define GEN7_ERR_INT _MMIO(0x44040)
> #define ERR_INT_POISON (1<<31)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 1d3161b..84dafcb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -498,7 +498,25 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
> * arises: do we still need this and if so how should we go about
> * invalidating the TLB?
> */
> - if (IS_GEN(dev_priv, 6, 7)) {
> + /* Respond to this question:
> + * According to mobile-5th-gen-core-family-datasheet-vol-2 from Intel
> + * There are registers for invalidation, set those registers will
> + * cause the hardware to perform IOTLB invalidation.
> + */
> + if (IS_GEN8(dev_priv)) {
> + i915_reg_t reg = IOTLB_INVALID(engine->mmio_base);
> +
> + /* ring should be idle before issuing a sync flush*/
> + WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
> +
> + I915_WRITE(reg, 0x0 | IOTLB_INVALID_IVT | IOTLB_GLOBAL_INV_REQ);
> +
> + if (intel_wait_for_register(dev_priv,
> + reg, IOTLB_INVALID_IAIG, 0,
> + 1000))
> + DRM_ERROR("%s: wait for TLB invalidation timed out\n",
> + engine->name);
> + } else if (IS_GEN(dev_priv, 6, 7)) {
> i915_reg_t reg = RING_INSTPM(engine->mmio_base);
>
> /* ring should be idle before issuing a sync flush*/

--
Jani Nikula, Intel Open Source Technology Center