Re: [PATCH v2 09/10] spi: aspeed: Calibrate read timings
From: Joel Stanley
Date: Wed Mar 02 2022 - 17:44:39 EST
On Wed, 2 Mar 2022 at 17:32, Cédric Le Goater <clg@xxxxxxxx> wrote:
>
> To accommodate the different response time of SPI transfers on different
> boards and different SPI NOR devices, the Aspeed controllers provide a
> set of Read Timing Compensation registers to tune the timing delays
> depending on the frequency being used. The AST2600 SoC has one of
> these registers per device. On the AST2500 and AST2400 SoCs, the
> timing register is shared by all devices which is a bit problematic to
> get good results other than for one device.
>
> The algorithm first reads a golden buffer at low speed and then performs
> reads with different clocks and delay cycle settings to find a breaking
> point. This selects a default good frequency for the CEx control register.
> The current settings are bit optimistic as we pick the first delay giving
typo: are a bit
> good results. A safer approach would be to determine an interval and
> choose the middle value.
>
> Calibration is performed when the direct mapping for reads is created.
> Since the underlying spi-nor object needs to be initialized to create
> the spi_mem operation for direct mapping, we should be fine. Having a
> specific API would clarify the requirements though.
>
> Cc: Pratyush Yadav <p.yadav@xxxxxx>
> Signed-off-by: Cédric Le Goater <clg@xxxxxxxx>
Reviewed-by: Joel Stanley <joel@xxxxxxxxx>
> ---
> drivers/spi/spi-aspeed-smc.c | 281 +++++++++++++++++++++++++++++++++++
> 1 file changed, 281 insertions(+)
>
> diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c
> index 8c6d7f79d97f..dce25dfe6913 100644
> --- a/drivers/spi/spi-aspeed-smc.c
> +++ b/drivers/spi/spi-aspeed-smc.c
> @@ -35,6 +35,8 @@
> #define CTRL_IO_ADDRESS_4B BIT(13) /* AST2400 SPI only */
> #define CTRL_IO_DUMMY_SET(dummy) \
> (((((dummy) >> 2) & 0x1) << 14) | (((dummy) & 0x3) << 6))
> +#define CTRL_FREQ_SEL_SHIFT 8
> +#define CTRL_FREQ_SEL_MASK GENMASK(11, CTRL_FREQ_SEL_SHIFT)
> #define CTRL_CE_STOP_ACTIVE BIT(2)
> #define CTRL_IO_MODE_CMD_MASK GENMASK(1, 0)
> #define CTRL_IO_MODE_NORMAL 0x0
> @@ -47,6 +49,9 @@
> /* CEx Address Decoding Range Register */
> #define CE0_SEGMENT_ADDR_REG 0x30
>
> +/* CEx Read timing compensation register */
> +#define CE0_TIMING_COMPENSATION_REG 0x94
> +
> enum aspeed_spi_ctl_reg_value {
> ASPEED_SPI_BASE,
> ASPEED_SPI_READ,
> @@ -72,10 +77,15 @@ struct aspeed_spi_data {
> bool hastype;
> u32 mode_bits;
> u32 we0;
> + u32 timing;
> + u32 hclk_mask;
> + u32 hdiv_max;
>
> u32 (*segment_start)(struct aspeed_spi *aspi, u32 reg);
> u32 (*segment_end)(struct aspeed_spi *aspi, u32 reg);
> u32 (*segment_reg)(struct aspeed_spi *aspi, u32 start, u32 end);
> + int (*calibrate)(struct aspeed_spi_chip *chip, u32 hdiv,
> + const u8 *golden_buf, u8 *test_buf);
> };
>
> #define ASPEED_SPI_MAX_NUM_CS 5
> @@ -519,6 +529,8 @@ static int aspeed_spi_chip_adjust_window(struct aspeed_spi_chip *chip,
> return 0;
> }
>
> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip);
> +
> static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
> {
> struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->master);
> @@ -567,6 +579,8 @@ static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
> chip->ctl_val[ASPEED_SPI_READ] = ctl_val;
> writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
>
> + ret = aspeed_spi_do_calibration(chip);
> +
> dev_info(aspi->dev, "CE%d read buswidth:%d [0x%08x]\n",
> chip->cs, op->data.buswidth, chip->ctl_val[ASPEED_SPI_READ]);
>
> @@ -814,6 +828,249 @@ static u32 aspeed_spi_segment_ast2600_reg(struct aspeed_spi *aspi,
> ((end - 1) & AST2600_SEG_ADDR_MASK);
> }
>
> +/*
> + * Read timing compensation sequences
> + */
> +
> +#define CALIBRATE_BUF_SIZE SZ_16K
> +
> +static bool aspeed_spi_check_reads(struct aspeed_spi_chip *chip,
> + const u8 *golden_buf, u8 *test_buf)
> +{
> + int i;
> +
> + for (i = 0; i < 10; i++) {
> + memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
> + if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0) {
> +#if defined(VERBOSE_DEBUG)
> + print_hex_dump_bytes(DEVICE_NAME " fail: ", DUMP_PREFIX_NONE,
> + test_buf, 0x100);
> +#endif
> + return false;
> + }
> + }
> + return true;
> +}
> +
> +#define FREAD_TPASS(i) (((i) / 2) | (((i) & 1) ? 0 : 8))
> +
> +/*
> + * The timing register is shared by all devices. Only update for CE0.
> + */
> +static int aspeed_spi_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
> + const u8 *golden_buf, u8 *test_buf)
> +{
> + struct aspeed_spi *aspi = chip->aspi;
> + const struct aspeed_spi_data *data = aspi->data;
> + int i;
> + int good_pass = -1, pass_count = 0;
> + u32 shift = (hdiv - 1) << 2;
> + u32 mask = ~(0xfu << shift);
> + u32 fread_timing_val = 0;
> +
> + /* Try HCLK delay 0..5, each one with/without delay and look for a
> + * good pair.
> + */
> + for (i = 0; i < 12; i++) {
> + bool pass;
> +
> + if (chip->cs == 0) {
> + fread_timing_val &= mask;
> + fread_timing_val |= FREAD_TPASS(i) << shift;
> + writel(fread_timing_val, aspi->regs + data->timing);
> + }
> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> + dev_dbg(aspi->dev,
> + " * [%08x] %d HCLK delay, %dns DI delay : %s",
> + fread_timing_val, i / 2, (i & 1) ? 0 : 4,
> + pass ? "PASS" : "FAIL");
> + if (pass) {
> + pass_count++;
> + if (pass_count == 3) {
> + good_pass = i - 1;
> + break;
> + }
> + } else {
> + pass_count = 0;
> + }
> + }
> +
> + /* No good setting for this frequency */
> + if (good_pass < 0)
> + return -1;
> +
> + /* We have at least one pass of margin, let's use first pass */
> + if (chip->cs == 0) {
> + fread_timing_val &= mask;
> + fread_timing_val |= FREAD_TPASS(good_pass) << shift;
> + writel(fread_timing_val, aspi->regs + data->timing);
> + }
> + dev_dbg(aspi->dev, " * -> good is pass %d [0x%08x]",
> + good_pass, fread_timing_val);
> + return 0;
> +}
> +
> +static bool aspeed_spi_check_calib_data(const u8 *test_buf, u32 size)
> +{
> + const u32 *tb32 = (const u32 *)test_buf;
> + u32 i, cnt = 0;
> +
> + /* We check if we have enough words that are neither all 0
> + * nor all 1's so the calibration can be considered valid.
> + *
> + * I use an arbitrary threshold for now of 64
> + */
> + size >>= 2;
> + for (i = 0; i < size; i++) {
> + if (tb32[i] != 0 && tb32[i] != 0xffffffff)
> + cnt++;
> + }
> + return cnt >= 64;
> +}
> +
> +static const u32 aspeed_spi_hclk_divs[] = {
> + 0xf, /* HCLK */
> + 0x7, /* HCLK/2 */
> + 0xe, /* HCLK/3 */
> + 0x6, /* HCLK/4 */
> + 0xd, /* HCLK/5 */
> +};
> +
> +#define ASPEED_SPI_HCLK_DIV(i) \
> + (aspeed_spi_hclk_divs[(i) - 1] << CTRL_FREQ_SEL_SHIFT)
> +
> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip)
> +{
> + struct aspeed_spi *aspi = chip->aspi;
> + const struct aspeed_spi_data *data = aspi->data;
> + u32 ahb_freq = aspi->clk_freq;
> + u32 max_freq = chip->clk_freq;
> + u32 ctl_val;
> + u8 *golden_buf = NULL;
> + u8 *test_buf = NULL;
> + int i, rc, best_div = -1;
> +
> + dev_dbg(aspi->dev, "calculate timing compensation - AHB freq: %d MHz",
> + ahb_freq / 1000000);
> +
> + /*
> + * use the related low frequency to get check calibration data
> + * and get golden data.
> + */
> + ctl_val = chip->ctl_val[ASPEED_SPI_READ] & data->hclk_mask;
> + writel(ctl_val, chip->ctl);
> +
> + test_buf = kzalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
> + if (!test_buf)
> + return -ENOMEM;
> +
> + golden_buf = test_buf + CALIBRATE_BUF_SIZE;
> +
> + memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
> + if (!aspeed_spi_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
> + dev_info(aspi->dev, "Calibration area too uniform, using low speed");
> + goto no_calib;
> + }
> +
> +#if defined(VERBOSE_DEBUG)
> + print_hex_dump_bytes(DEVICE_NAME " good: ", DUMP_PREFIX_NONE,
> + golden_buf, 0x100);
> +#endif
> +
> + /* Now we iterate the HCLK dividers until we find our breaking point */
> + for (i = ARRAY_SIZE(aspeed_spi_hclk_divs); i > data->hdiv_max - 1; i--) {
> + u32 tv, freq;
> +
> + freq = ahb_freq / i;
> + if (freq > max_freq)
> + continue;
> +
> + /* Set the timing */
> + tv = chip->ctl_val[ASPEED_SPI_READ] | ASPEED_SPI_HCLK_DIV(i);
> + writel(tv, chip->ctl);
> + dev_dbg(aspi->dev, "Trying HCLK/%d [%08x] ...", i, tv);
> + rc = data->calibrate(chip, i, golden_buf, test_buf);
> + if (rc == 0)
> + best_div = i;
> + }
> +
> + /* Nothing found ? */
> + if (best_div < 0) {
> + dev_warn(aspi->dev, "No good frequency, using dumb slow");
> + } else {
> + dev_dbg(aspi->dev, "Found good read timings at HCLK/%d", best_div);
> +
> + /* Record the freq */
> + for (i = 0; i < ASPEED_SPI_MAX; i++)
> + chip->ctl_val[i] = (chip->ctl_val[i] & data->hclk_mask) |
> + ASPEED_SPI_HCLK_DIV(best_div);
> + }
> +
> +no_calib:
> + writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
> + kfree(test_buf);
> + return 0;
> +}
> +
> +#define TIMING_DELAY_DI BIT(3)
> +#define TIMING_DELAY_HCYCLE_MAX 5
> +#define TIMING_REG_AST2600(chip) \
> + ((chip)->aspi->regs + (chip)->aspi->data->timing + \
> + (chip)->cs * 4)
> +
> +static int aspeed_spi_ast2600_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
> + const u8 *golden_buf, u8 *test_buf)
> +{
> + struct aspeed_spi *aspi = chip->aspi;
> + int hcycle;
> + u32 shift = (hdiv - 2) << 3;
> + u32 mask = ~(0xfu << shift);
> + u32 fread_timing_val = 0;
> +
> + for (hcycle = 0; hcycle <= TIMING_DELAY_HCYCLE_MAX; hcycle++) {
> + int delay_ns;
> + bool pass = false;
> +
> + fread_timing_val &= mask;
> + fread_timing_val |= hcycle << shift;
> +
> + /* no DI input delay first */
> + writel(fread_timing_val, TIMING_REG_AST2600(chip));
> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> + dev_dbg(aspi->dev,
> + " * [%08x] %d HCLK delay, DI delay none : %s",
> + fread_timing_val, hcycle, pass ? "PASS" : "FAIL");
> + if (pass)
> + return 0;
> +
> + /* Add DI input delays */
> + fread_timing_val &= mask;
> + fread_timing_val |= (TIMING_DELAY_DI | hcycle) << shift;
> +
> + for (delay_ns = 0; delay_ns < 0x10; delay_ns++) {
> + fread_timing_val &= ~(0xf << (4 + shift));
> + fread_timing_val |= delay_ns << (4 + shift);
> +
> + writel(fread_timing_val, TIMING_REG_AST2600(chip));
> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> + dev_dbg(aspi->dev,
> + " * [%08x] %d HCLK delay, DI delay %d.%dns : %s",
> + fread_timing_val, hcycle, (delay_ns + 1) / 2,
> + (delay_ns + 1) & 1 ? 5 : 5, pass ? "PASS" : "FAIL");
> + /*
> + * TODO: This is optimistic. We should look
> + * for a working interval and save the middle
> + * value in the read timing register.
> + */
> + if (pass)
> + return 0;
> + }
> + }
> +
> + /* No good setting for this frequency */
> + return -1;
> +}
> +
> /*
> * Platform definitions
> */
> @@ -822,6 +1079,10 @@ static const struct aspeed_spi_data ast2400_fmc_data = {
> .hastype = true,
> .we0 = 16,
> .ctl0 = CE0_CTRL_REG,
> + .timing = CE0_TIMING_COMPENSATION_REG,
> + .hclk_mask = 0xfffff0ff,
> + .hdiv_max = 1,
> + .calibrate = aspeed_spi_calibrate,
> .segment_start = aspeed_spi_segment_start,
> .segment_end = aspeed_spi_segment_end,
> .segment_reg = aspeed_spi_segment_reg,
> @@ -832,6 +1093,10 @@ static const struct aspeed_spi_data ast2400_spi_data = {
> .hastype = false,
> .we0 = 0,
> .ctl0 = 0x04,
> + .timing = 0x14,
> + .hclk_mask = 0xfffff0ff,
> + .hdiv_max = 1,
> + .calibrate = aspeed_spi_calibrate,
> /* No segment registers */
> };
>
> @@ -840,6 +1105,10 @@ static const struct aspeed_spi_data ast2500_fmc_data = {
> .hastype = true,
> .we0 = 16,
> .ctl0 = CE0_CTRL_REG,
> + .timing = CE0_TIMING_COMPENSATION_REG,
> + .hclk_mask = 0xfffff0ff,
> + .hdiv_max = 1,
> + .calibrate = aspeed_spi_calibrate,
> .segment_start = aspeed_spi_segment_start,
> .segment_end = aspeed_spi_segment_end,
> .segment_reg = aspeed_spi_segment_reg,
> @@ -850,6 +1119,10 @@ static const struct aspeed_spi_data ast2500_spi_data = {
> .hastype = false,
> .we0 = 16,
> .ctl0 = CE0_CTRL_REG,
> + .timing = CE0_TIMING_COMPENSATION_REG,
> + .hclk_mask = 0xfffff0ff,
> + .hdiv_max = 1,
> + .calibrate = aspeed_spi_calibrate,
> .segment_start = aspeed_spi_segment_start,
> .segment_end = aspeed_spi_segment_end,
> .segment_reg = aspeed_spi_segment_reg,
> @@ -861,6 +1134,10 @@ static const struct aspeed_spi_data ast2600_fmc_data = {
> .mode_bits = SPI_RX_QUAD | SPI_RX_QUAD,
> .we0 = 16,
> .ctl0 = CE0_CTRL_REG,
> + .timing = CE0_TIMING_COMPENSATION_REG,
> + .hclk_mask = 0xf0fff0ff,
> + .hdiv_max = 2,
> + .calibrate = aspeed_spi_ast2600_calibrate,
> .segment_start = aspeed_spi_segment_ast2600_start,
> .segment_end = aspeed_spi_segment_ast2600_end,
> .segment_reg = aspeed_spi_segment_ast2600_reg,
> @@ -872,6 +1149,10 @@ static const struct aspeed_spi_data ast2600_spi_data = {
> .mode_bits = SPI_RX_QUAD | SPI_RX_QUAD,
> .we0 = 16,
> .ctl0 = CE0_CTRL_REG,
> + .timing = CE0_TIMING_COMPENSATION_REG,
> + .hclk_mask = 0xf0fff0ff,
> + .hdiv_max = 2,
> + .calibrate = aspeed_spi_ast2600_calibrate,
> .segment_start = aspeed_spi_segment_ast2600_start,
> .segment_end = aspeed_spi_segment_ast2600_end,
> .segment_reg = aspeed_spi_segment_ast2600_reg,
> --
> 2.34.1
>