[PATCH 1/5] PCI/switchtec: Error out MRPC execution when no GAS access

From: kelvin.cao
Date: Fri Sep 24 2021 - 00:15:53 EST


From: Kelvin Cao <kelvin.cao@xxxxxxxxxxxxx>

After a firmware hard reset, MRPC command executions, which are based
on the PCI BAR (which Microchip refers to as GAS) read/write, will hang
indefinitely. This is because after a reset, the host will fail all GAS
reads (get all 1s), in which case the driver won't get a valid MRPC
status.

Add a read check to GAS access when a MRPC command execution doesn't
response timely, error out if the check fails.

Signed-off-by: Kelvin Cao <kelvin.cao@xxxxxxxxxxxxx>
---
drivers/pci/switch/switchtec.c | 59 ++++++++++++++++++++++++++++++----
1 file changed, 52 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 0b301f8be9ed..092653487021 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -45,6 +45,7 @@ enum mrpc_state {
MRPC_QUEUED,
MRPC_RUNNING,
MRPC_DONE,
+ MRPC_IO_ERROR,
};

struct switchtec_user {
@@ -66,6 +67,13 @@ struct switchtec_user {
int event_cnt;
};

+static int check_access(struct switchtec_dev *stdev)
+{
+ u32 device = ioread32(&stdev->mmio_sys_info->device_id);
+
+ return stdev->pdev->device == device;
+}
+
static struct switchtec_user *stuser_create(struct switchtec_dev *stdev)
{
struct switchtec_user *stuser;
@@ -113,6 +121,7 @@ static void stuser_set_state(struct switchtec_user *stuser,
[MRPC_QUEUED] = "QUEUED",
[MRPC_RUNNING] = "RUNNING",
[MRPC_DONE] = "DONE",
+ [MRPC_IO_ERROR] = "IO_ERROR",
};

stuser->state = state;
@@ -184,6 +193,21 @@ static int mrpc_queue_cmd(struct switchtec_user *stuser)
return 0;
}

+static void mrpc_cleanup_cmd(struct switchtec_dev *stdev)
+{
+ /* requires the mrpc_mutex to already be held when called */
+ struct switchtec_user *stuser = list_entry(stdev->mrpc_queue.next,
+ struct switchtec_user, list);
+
+ stuser->cmd_done = true;
+ wake_up_interruptible(&stuser->cmd_comp);
+ list_del_init(&stuser->list);
+ stuser_put(stuser);
+ stdev->mrpc_busy = 0;
+
+ mrpc_cmd_submit(stdev);
+}
+
static void mrpc_complete_cmd(struct switchtec_dev *stdev)
{
/* requires the mrpc_mutex to already be held when called */
@@ -223,13 +247,7 @@ static void mrpc_complete_cmd(struct switchtec_dev *stdev)
memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data,
stuser->read_len);
out:
- stuser->cmd_done = true;
- wake_up_interruptible(&stuser->cmd_comp);
- list_del_init(&stuser->list);
- stuser_put(stuser);
- stdev->mrpc_busy = 0;
-
- mrpc_cmd_submit(stdev);
+ mrpc_cleanup_cmd(stdev);
}

static void mrpc_event_work(struct work_struct *work)
@@ -246,6 +264,23 @@ static void mrpc_event_work(struct work_struct *work)
mutex_unlock(&stdev->mrpc_mutex);
}

+static void mrpc_error_complete_cmd(struct switchtec_dev *stdev)
+{
+ /* requires the mrpc_mutex to already be held when called */
+
+ struct switchtec_user *stuser;
+
+ if (list_empty(&stdev->mrpc_queue))
+ return;
+
+ stuser = list_entry(stdev->mrpc_queue.next,
+ struct switchtec_user, list);
+
+ stuser_set_state(stuser, MRPC_IO_ERROR);
+
+ mrpc_cleanup_cmd(stdev);
+}
+
static void mrpc_timeout_work(struct work_struct *work)
{
struct switchtec_dev *stdev;
@@ -257,6 +292,11 @@ static void mrpc_timeout_work(struct work_struct *work)

mutex_lock(&stdev->mrpc_mutex);

+ if (!check_access(stdev)) {
+ mrpc_error_complete_cmd(stdev);
+ goto out;
+ }
+
if (stdev->dma_mrpc)
status = stdev->dma_mrpc->status;
else
@@ -544,6 +584,11 @@ static ssize_t switchtec_dev_read(struct file *filp, char __user *data,
if (rc)
return rc;

+ if (stuser->state == MRPC_IO_ERROR) {
+ mutex_unlock(&stdev->mrpc_mutex);
+ return -EIO;
+ }
+
if (stuser->state != MRPC_DONE) {
mutex_unlock(&stdev->mrpc_mutex);
return -EBADE;
--
2.25.1