Re: [PATCH v4 1/1] applesmc: Re-work SMC comms

From: Guenter Roeck
Date: Wed Nov 11 2020 - 00:56:43 EST


On 11/10/20 7:38 PM, Brad Campbell wrote:
> Commit fff2d0f701e6 ("hwmon: (applesmc) avoid overlong udelay()")
> introduced an issue whereby communication with the SMC became
> unreliable with write errors like :
>
> [ 120.378614] applesmc: send_byte(0x00, 0x0300) fail: 0x40
> [ 120.378621] applesmc: LKSB: write data fail
> [ 120.512782] applesmc: send_byte(0x00, 0x0300) fail: 0x40
> [ 120.512787] applesmc: LKSB: write data fail
>
> The original code appeared to be timing sensitive and was not reliable
> with the timing changes in the aforementioned commit.
>
> This patch re-factors the SMC communication to remove the timing
> dependencies and restore function with the changes previously
> committed.
>
> Tested on : MacbookAir6,2 MacBookPro11,1 iMac12,2, MacBookAir1,1,
> MacBookAir3,1
>
> Fixes: fff2d0f701e6 ("hwmon: (applesmc) avoid overlong udelay()")
> Reported-by: Andreas Kemnade <andreas@xxxxxxxxxxxx>
> Tested-by: Andreas Kemnade <andreas@xxxxxxxxxxxx> # MacBookAir6,2
> Acked-by: Arnd Bergmann <arnd@xxxxxxxx>
> Signed-off-by: Brad Campbell <brad@xxxxxxxxxxxxxxx>
> Signed-off-by: Henrik Rydberg <rydberg@xxxxxxxxxxx>
>
> ---
> Changelog :
> v1 : Initial attempt
> v2 : Address logic and coding style
> v3 : Removed some debug hangover. Added tested-by. Modifications for MacBookAir1,1
> v4 : Re-factored logic based on Apple driver. Simplified wait_status loop
> Index: linux-stable/drivers/hwmon/applesmc.c
> ===================================================================
> --- linux-stable.orig/drivers/hwmon/applesmc.c
> +++ linux-stable/drivers/hwmon/applesmc.c
> @@ -32,6 +32,7 @@
> #include <linux/hwmon.h>
> #include <linux/workqueue.h>
> #include <linux/err.h>
> +#include <linux/bits.h>
>
> /* data port used by Apple SMC */
> #define APPLESMC_DATA_PORT 0x300
> @@ -42,10 +43,14 @@
>
> #define APPLESMC_MAX_DATA_LENGTH 32
>
> -/* wait up to 128 ms for a status change. */
> -#define APPLESMC_MIN_WAIT 0x0010
> -#define APPLESMC_RETRY_WAIT 0x0100
> -#define APPLESMC_MAX_WAIT 0x20000
> +/* Apple SMC status bits */
> +#define SMC_STATUS_AWAITING_DATA BIT(0) /* SMC has data waiting to be read */
> +#define SMC_STATUS_IB_CLOSED BIT(1) /* Will ignore any input */
> +#define SMC_STATUS_BUSY BIT(2) /* Command in progress */
> +
> +/* Exponential delay boundaries */
> +#define APPLESMC_MIN_WAIT 0x0008
> +#define APPLESMC_MAX_WAIT 0x100000

This is a substantial increase in wait time which should be documented.
0x20000 was explained (it translated to 128 ms), but this isn't,
and no reason is provided why it was increased to one second.
Is there any evidence that this is needed ? The only "benefit" I
can see is that a stuck SMC will now hang everything 8 times longer.

There really should be some evidence suggesting that the longer
timeout is really needed, better than "the apple driver does it".
The timeout was increased to 128 ms back in 2012, according to
the commit because timeouts were observed on MacBookPro6,1.
I would expect something similar here. In other words, describe
the circumstances of observed timeouts and the affected system(s).

>
> #define APPLESMC_READ_CMD 0x10
> #define APPLESMC_WRITE_CMD 0x11
> @@ -151,65 +156,73 @@ static unsigned int key_at_index;
> static struct workqueue_struct *applesmc_led_wq;
>
> /*
> - * wait_read - Wait for a byte to appear on SMC port. Callers must
> - * hold applesmc_lock.
> + * Wait for specific status bits with a mask on the SMC
> + * Used before all transactions
> */
> -static int wait_read(void)
> +
> +static int wait_status(u8 val, u8 mask)
> {
> - unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC;
> u8 status;
> int us;
>
> - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
> - usleep_range(us, us * 16);
> + for (us = APPLESMC_MIN_WAIT; us <= APPLESMC_MAX_WAIT; us <<= 1) {
> status = inb(APPLESMC_CMD_PORT);
> - /* read: wait for smc to settle */
> - if (status & 0x01)
> + if ((status & mask) == val)
> return 0;
> - /* timeout: give up */
> - if (time_after(jiffies, end))
> - break;
> + usleep_range(APPLESMC_MIN_WAIT, us);

Fighting with indentation again... and does that range really make sense ?
Seems to me it makes the whole loop quite unpredictable in its behavior.

> }
> -
> - pr_warn("wait_read() fail: 0x%02x\n", status);
> return -EIO;
> }
>
> -/*
> - * send_byte - Write to SMC port, retrying when necessary. Callers
> - * must hold applesmc_lock.
> - */
> +/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */
> +
> static int send_byte(u8 cmd, u16 port)
> {
> - u8 status;
> - int us;
> - unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC;
> + int status;
>
> + status = wait_status(0, SMC_STATUS_IB_CLOSED);
> + if (status)
> + return status;
> + status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY);
> + if (status)
> + return status;

I think this warrants an explanation/comment.

> outb(cmd, port);
> - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
> - usleep_range(us, us * 16);
> - status = inb(APPLESMC_CMD_PORT);
> - /* write: wait for smc to settle */
> - if (status & 0x02)
> - continue;
> - /* ready: cmd accepted, return */
> - if (status & 0x04)
> - return 0;
> - /* timeout: give up */
> - if (time_after(jiffies, end))
> - break;
> - /* busy: long wait and resend */
> - udelay(APPLESMC_RETRY_WAIT);
> - outb(cmd, port);
> - }
> -
> - pr_warn("send_byte(0x%02x, 0x%04x) fail: 0x%02x\n", cmd, port, status);
> - return -EIO;
> + return 0;
> }
>
> +/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */
> +
> static int send_command(u8 cmd)
> {
> - return send_byte(cmd, APPLESMC_CMD_PORT);
> + int ret;
> +
> + ret = wait_status(0, SMC_STATUS_IB_CLOSED);
> + if (ret)
> + return ret;
> + outb(cmd, APPLESMC_CMD_PORT);
> + return 0;
> +}
> +
> +/* Based on logic from the Apple driver. This is issued before any interaction
> + * If busy is stuck high, issue a read command to reset the SMC state
> + * machine. If busy is stuck high after the command then the SMC is
> + * jammed.
> + */

This is not the networking subsystem. Please use standard multi-line comments.

> +
> +static int smc_sane(void)
> +{
> + int ret;
> +
> + ret = wait_status(0, SMC_STATUS_BUSY);
> + if (!ret)
> + return ret;
> + ret = send_command(APPLESMC_READ_CMD);
> + if (ret)
> + return ret;
> + ret = wait_status(0, SMC_STATUS_BUSY);
> + if (!ret)
> + return ret;
> + return -EIO;

return wait_status(0, SMC_STATUS_BUSY);

> }
>
> static int send_argument(const char *key)
> @@ -226,6 +239,11 @@ static int read_smc(u8 cmd, const char *
> {
> u8 status, data = 0;
> int i;
> + int ret;
> +
> + ret = smc_sane();
> + if (ret)
> + return ret;
>
> if (send_command(cmd) || send_argument(key)) {
> pr_warn("%.4s: read arg fail\n", key);
> @@ -239,7 +257,8 @@ static int read_smc(u8 cmd, const char *
> }
>
> for (i = 0; i < len; i++) {
> - if (wait_read()) {
> + if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY,
> + SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY) < 0) {
> pr_warn("%.4s: read data[%d] fail\n", key, i);
> return -EIO;
> }
> @@ -250,19 +269,24 @@ static int read_smc(u8 cmd, const char *
> for (i = 0; i < 16; i++) {
> udelay(APPLESMC_MIN_WAIT);
> status = inb(APPLESMC_CMD_PORT);
> - if (!(status & 0x01))
> + if (!(status & SMC_STATUS_AWAITING_DATA))
> break;
> data = inb(APPLESMC_DATA_PORT);
> }
> if (i)
> pr_warn("flushed %d bytes, last value is: %d\n", i, data);
>
> - return 0;
> + return wait_status(0, SMC_STATUS_BUSY);
> }
>
> static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len)
> {
> int i;
> + int ret;
> +
> + ret = smc_sane();
> + if (ret)
> + return ret;
>
> if (send_command(cmd) || send_argument(key)) {
> pr_warn("%s: write arg fail\n", key);
> @@ -281,7 +305,7 @@ static int write_smc(u8 cmd, const char
> }
> }
>
> - return 0;
> + return wait_status(0, SMC_STATUS_BUSY);
> }
>
> static int read_register_count(unsigned int *count)
>