[PATCH v4 1/2] drm/i915/gt: Serialize GRDOM access between multiple engine resets

From: Mauro Carvalho Chehab
Date: Mon Jul 11 2022 - 02:50:52 EST


From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.

Cc: stable@xxxxxxxxxxxxxxx # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Acked-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Reviewed-by: Andi Shyti <andi.shyti@xxxxxxxxx>
Reviewed-by: Andrzej Hajda <andrzej.hajda@xxxxxxxxx>
Acked-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
Signed-off-by: Mauro Carvalho Chehab <mchehab@xxxxxxxxxx>
---

See [PATCH v4 0/2] at: https://lore.kernel.org/all/cover.1657522157.git.mchehab@xxxxxxxxxx/

drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++++++++-------
1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}

-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}

+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&gt->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(&gt->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}

-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;

+ spin_lock_irqsave(&gt->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);

if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);

skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);

+ spin_unlock_irqrestore(&gt->uncore->lock, flags);
+
return ret;
}

--
2.36.1