[PATCH 5.13 011/127] scsi: pm80xx: Fix TMF task completion race condition

From: Sasha Levin
Date: Tue Aug 24 2021 - 12:56:46 EST


From: Igor Pylypiv <ipylypiv@xxxxxxxxxx>

[ Upstream commit d712d3fb484b7fa8d1d57e9ca6f134bb9d8c18b1 ]

The TMF timeout timer may trigger at the same time when the response from a
controller is being handled. When this happens the SAS task may get freed
before the response processing is finished.

Fix this by calling complete() only when SAS_TASK_STATE_DONE is not set.

A similar race condition was fixed in commit b90cd6f2b905 ("scsi: libsas:
fix a race condition when smp task timeout")

Link: https://lore.kernel.org/r/20210707185945.35559-1-ipylypiv@xxxxxxxxxx
Reviewed-by: Vishakha Channapattan <vishakhavc@xxxxxxxxxx>
Acked-by: Jack Wang <jinpu.wang@xxxxxxxxx>
Signed-off-by: Igor Pylypiv <ipylypiv@xxxxxxxxxx>
Signed-off-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
drivers/scsi/pm8001/pm8001_sas.c | 32 +++++++++++++++-----------------
1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 335cf37e6cb9..2e429e31f1f0 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev)

void pm8001_task_done(struct sas_task *task)
{
- if (!del_timer(&task->slow_task->timer))
- return;
+ del_timer(&task->slow_task->timer);
complete(&task->slow_task->completion);
}

@@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t)
{
struct sas_task_slow *slow = from_timer(slow, t, timer);
struct sas_task *task = slow->task;
+ unsigned long flags;

- task->task_state_flags |= SAS_TASK_STATE_ABORTED;
- complete(&task->slow_task->completion);
+ spin_lock_irqsave(&task->task_state_lock, flags);
+ if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+ task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+ complete(&task->slow_task->completion);
+ }
+ spin_unlock_irqrestore(&task->task_state_lock, flags);
}

#define PM8001_TASK_TIMEOUT 20
@@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
}
res = -TMF_RESP_FUNC_FAILED;
/* Even TMF timed out, return direct. */
- if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
- if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
- pm8001_dbg(pm8001_ha, FAIL,
- "TMF task[%x]timeout.\n",
- tmf->tmf);
- goto ex_err;
- }
+ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+ pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
+ tmf->tmf);
+ goto ex_err;
}

if (task->task_status.resp == SAS_TASK_COMPLETE &&
@@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
wait_for_completion(&task->slow_task->completion);
res = TMF_RESP_FUNC_FAILED;
/* Even TMF timed out, return direct. */
- if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
- if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
- pm8001_dbg(pm8001_ha, FAIL,
- "TMF task timeout.\n");
- goto ex_err;
- }
+ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+ pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n");
+ goto ex_err;
}

if (task->task_status.resp == SAS_TASK_COMPLETE &&
--
2.30.2