[PATCH RESEND 11/25] mpt3sas: fix for driver fails EEH, recovery from injected pci bus error

From: Sreekanth Reddy
Date: Wed Nov 11 2015 - 07:02:15 EST


From: Sreekanth Reddy <sreekanth.reddy@xxxxxxxxxxxxx>

This patch stops the driver to invoke kthread (which remove the dead ioc)
for some time while EEH recovery has started.

This changes are ported from below mpt2sas driver patch
'commit b4730fb6e54a634a145c9c71c5cf856f00beb5cd
("mpt2sas: fix for driver fails EEH, recovery from injected pci bus error")'

Signed-off-by: Sreekanth Reddy <Sreekanth.Reddy@xxxxxxxxxxxxx>
---
drivers/scsi/mpt3sas/mpt3sas_base.c | 19 ++++++++++++++++++-
drivers/scsi/mpt3sas/mpt3sas_base.h | 1 +
2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 2b33e48..b5b1eb2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -157,7 +157,7 @@ _base_fault_reset_work(struct work_struct *work)


spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
- if (ioc->shost_recovery)
+ if (ioc->shost_recovery || ioc->pci_error_recovery)
goto rearm_timer;
spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);

@@ -166,6 +166,20 @@ _base_fault_reset_work(struct work_struct *work)
pr_err(MPT3SAS_FMT "SAS host is non-operational !!!!\n",
ioc->name);

+ /* It may be possible that EEH recovery can resolve some of
+ * pci bus failure issues rather removing the dead ioc function
+ * by considering controller is in a non-operational state. So
+ * here priority is given to the EEH recovery. If it doesn't
+ * not resolve this issue, mpt3sas driver will consider this
+ * controller to non-operational state and remove the dead ioc
+ * function.
+ */
+ if (ioc->non_operational_loop++ < 5) {
+ spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock,
+ flags);
+ goto rearm_timer;
+ }
+
/*
* Call _scsih_flush_pending_cmds callback so that we flush all
* pending commands back to OS. This call is required to aovid
@@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work)
return; /* don't rearm timer */
}

+ ioc->non_operational_loop = 0;
+
if ((doorbell & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_OPERATIONAL) {
rc = mpt3sas_base_hard_reset_handler(ioc, CAN_SLEEP,
FORCE_BIG_HAMMER);
@@ -5162,6 +5178,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
if (r)
goto out_free_resources;

+ ioc->non_operational_loop = 0;
return 0;

out_free_resources:
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 08f46a7..a0d1f13 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -845,6 +845,7 @@ struct MPT3SAS_ADAPTER {
u16 cpu_msix_table_sz;
u32 ioc_reset_count;
MPT3SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds;
+ u32 non_operational_loop;

/* internal commands, callback index */
u8 scsi_io_cb_idx;
--
2.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/