[PATCH 5.10 091/299] platform/x86: ISST: Account for increased timeout in some cases

From: Greg Kroah-Hartman
Date: Mon May 10 2021 - 07:00:42 EST


From: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>

[ Upstream commit 5c782817a981981917ec3c647cf521022ee07143 ]

In some cases when firmware is busy or updating, some mailbox commands
still timeout on some newer CPUs. To fix this issue, change how we
process timeout.

With this change, replaced timeout from using simple count with real
timeout in micro-seconds using ktime. When the command response takes
more than average processing time, yield to other tasks. The worst case
timeout is extended upto 1 milli-second.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/r/20210330220840.3113959-1-srinivas.pandruvada@xxxxxxxxxxxxxxx
Signed-off-by: Hans de Goede <hdegoede@xxxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
.../intel_speed_select_if/isst_if_mbox_pci.c | 33 +++++++++++++------
1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
index 95f01e7a87d5..da958aa8468d 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
+++ b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
@@ -21,12 +21,16 @@
#define PUNIT_MAILBOX_BUSY_BIT 31

/*
- * The average time to complete some commands is about 40us. The current
- * count is enough to satisfy 40us. But when the firmware is very busy, this
- * causes timeout occasionally. So increase to deal with some worst case
- * scenarios. Most of the command still complete in few us.
+ * The average time to complete mailbox commands is less than 40us. Most of
+ * the commands complete in few micro seconds. But the same firmware handles
+ * requests from all power management features.
+ * We can create a scenario where we flood the firmware with requests then
+ * the mailbox response can be delayed for 100s of micro seconds. So define
+ * two timeouts. One for average case and one for long.
+ * If the firmware is taking more than average, just call cond_resched().
*/
-#define OS_MAILBOX_RETRY_COUNT 100
+#define OS_MAILBOX_TIMEOUT_AVG_US 40
+#define OS_MAILBOX_TIMEOUT_MAX_US 1000

struct isst_if_device {
struct mutex mutex;
@@ -35,11 +39,13 @@ struct isst_if_device {
static int isst_if_mbox_cmd(struct pci_dev *pdev,
struct isst_if_mbox_cmd *mbox_cmd)
{
- u32 retries, data;
+ s64 tm_delta = 0;
+ ktime_t tm;
+ u32 data;
int ret;

/* Poll for rb bit == 0 */
- retries = OS_MAILBOX_RETRY_COUNT;
+ tm = ktime_get();
do {
ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
&data);
@@ -48,11 +54,14 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,

if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
ret = -EBUSY;
+ tm_delta = ktime_us_delta(ktime_get(), tm);
+ if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
+ cond_resched();
continue;
}
ret = 0;
break;
- } while (--retries);
+ } while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);

if (ret)
return ret;
@@ -74,7 +83,8 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
return ret;

/* Poll for rb bit == 0 */
- retries = OS_MAILBOX_RETRY_COUNT;
+ tm_delta = 0;
+ tm = ktime_get();
do {
ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
&data);
@@ -83,6 +93,9 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,

if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
ret = -EBUSY;
+ tm_delta = ktime_us_delta(ktime_get(), tm);
+ if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
+ cond_resched();
continue;
}

@@ -96,7 +109,7 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
mbox_cmd->resp_data = data;
ret = 0;
break;
- } while (--retries);
+ } while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);

return ret;
}
--
2.30.2