[PATCH 5.9 006/133] drm/i915/gt: Always send a pulse down the engine after disabling heartbeat

From: Greg Kroah-Hartman
Date: Mon Nov 09 2020 - 08:17:12 EST


From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

commit ca65fc0d8e01dca8fc82f0ccf433725469256c71 upstream.

Currently, we check we can send a pulse prior to disabling the
heartbeat to verify that we can change the heartbeat, but since we may
re-evaluate execution upon changing the heartbeat interval we need another
pulse afterwards to refresh execution.

v2: Tvrtko asked if we could reduce the double pulse to a single, which
opened up a discussion of how we should handle the pulse-error after
attempting to change the property, and the desire to serialise
adjustment of the property with its validating pulse, and unwind upon
failure.

Fixes: 9a40bddd47ca ("drm/i915/gt: Expose heartbeat interval via sysfs")
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: <stable@xxxxxxxxxxxxxxx> # v5.7+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Acked-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Link: https://patchwork.freedesktop.org/patch/msgid/20200928221510.26044-2-chris@xxxxxxxxxxxxxxxxxx
(cherry picked from commit 3dd66a94de59d7792e7917eb3075342e70f06f44)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 106 ++++++++++++++---------
1 file changed, 67 insertions(+), 39 deletions(-)

--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -177,36 +177,82 @@ void intel_engine_init_heartbeat(struct
INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
}

+static int __intel_engine_pulse(struct intel_engine_cs *engine)
+{
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+
+ lockdep_assert_held(&ce->timeline->mutex);
+ GEM_BUG_ON(!intel_engine_has_preemption(engine));
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
+ idle_pulse(engine, rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+ GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
+
+ return 0;
+}
+
+static unsigned long set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay)
+{
+ unsigned long old;
+
+ old = xchg(&engine->props.heartbeat_interval_ms, delay);
+ if (delay)
+ intel_engine_unpark_heartbeat(engine);
+ else
+ intel_engine_park_heartbeat(engine);
+
+ return old;
+}
+
int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
unsigned long delay)
{
- int err;
+ struct intel_context *ce = engine->kernel_context;
+ int err = 0;

- /* Send one last pulse before to cleanup persistent hogs */
- if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
- err = intel_engine_pulse(engine);
- if (err)
- return err;
- }
+ if (!delay && !intel_engine_has_preempt_reset(engine))
+ return -ENODEV;
+
+ intel_engine_pm_get(engine);
+
+ err = mutex_lock_interruptible(&ce->timeline->mutex);
+ if (err)
+ goto out_rpm;

- WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
+ if (delay != engine->props.heartbeat_interval_ms) {
+ unsigned long saved = set_heartbeat(engine, delay);

- if (intel_engine_pm_get_if_awake(engine)) {
- if (delay)
- intel_engine_unpark_heartbeat(engine);
- else
- intel_engine_park_heartbeat(engine);
- intel_engine_pm_put(engine);
+ /* recheck current execution */
+ if (intel_engine_has_preemption(engine)) {
+ err = __intel_engine_pulse(engine);
+ if (err)
+ set_heartbeat(engine, saved);
+ }
}

- return 0;
+ mutex_unlock(&ce->timeline->mutex);
+
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
}

int intel_engine_pulse(struct intel_engine_cs *engine)
{
- struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
struct intel_context *ce = engine->kernel_context;
- struct i915_request *rq;
int err;

if (!intel_engine_has_preemption(engine))
@@ -215,30 +261,12 @@ int intel_engine_pulse(struct intel_engi
if (!intel_engine_pm_get_if_awake(engine))
return 0;

- if (mutex_lock_interruptible(&ce->timeline->mutex)) {
- err = -EINTR;
- goto out_rpm;
+ err = -EINTR;
+ if (!mutex_lock_interruptible(&ce->timeline->mutex)) {
+ err = __intel_engine_pulse(engine);
+ mutex_unlock(&ce->timeline->mutex);
}

- intel_context_enter(ce);
- rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
- intel_context_exit(ce);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_unlock;
- }
-
- __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
- idle_pulse(engine, rq);
-
- __i915_request_commit(rq);
- __i915_request_queue(rq, &attr);
- GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
- err = 0;
-
-out_unlock:
- mutex_unlock(&ce->timeline->mutex);
-out_rpm:
intel_engine_pm_put(engine);
return err;
}