Re: [PATCH v3 2/2] Implement SDHCI CQE support for DesignWare SDHCI.
From: Adrian Hunter
Date: Fri Oct 20 2023 - 05:23:45 EST
On 16/10/23 14:42, Sergey Khimich wrote:
> Hello Adrian!
>
> Thanks for review and comments! There are few questions I`d like to clarify:
>
> On 09.10.2023 18:39, Adrian Hunter wrote:
>> On 2/10/23 14:33, Sergey Khimich wrote:
>>> From: Sergey Khimich <serghox@xxxxxxxxx>
>>>
>>> For enabling CQE support just set 'supports-cqe' in your DevTree file
>>> for appropriate mmc node.
>>>
>>> Signed-off-by: Sergey Khimich <serghox@xxxxxxxxx>
>>> ---
>>> drivers/mmc/host/Kconfig | 1 +
>>> drivers/mmc/host/sdhci-of-dwcmshc.c | 233 +++++++++++++++++++++++++++-
>>> 2 files changed, 232 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
>>> index 554e67103c1a..f3380b014ca9 100644
>>> --- a/drivers/mmc/host/Kconfig
>>> +++ b/drivers/mmc/host/Kconfig
>>> @@ -233,6 +233,7 @@ config MMC_SDHCI_OF_DWCMSHC
>>> depends on MMC_SDHCI_PLTFM
>>> depends on OF
>>> depends on COMMON_CLK
>>> + select MMC_CQHCI
>>> help
>>> This selects Synopsys DesignWare Cores Mobile Storage Controller
>>> support.
>>> diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
>>> index 3a3bae6948a8..7d43ae011811 100644
>>> --- a/drivers/mmc/host/sdhci-of-dwcmshc.c
>>> +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
>>> @@ -20,6 +20,7 @@
>>> #include <linux/sizes.h>
>>> #include "sdhci-pltfm.h"
>>> +#include "cqhci.h"
>>> #define SDHCI_DWCMSHC_ARG2_STUFF GENMASK(31, 16)
>>> @@ -36,6 +37,9 @@
>>> #define DWCMSHC_ENHANCED_STROBE BIT(8)
>>> #define DWCMSHC_EMMC_ATCTRL 0x40
>>> +/* DWC IP vendor area 2 pointer */
>>> +#define DWCMSHC_P_VENDOR_AREA2 0xea
>>> +
>>> /* Rockchip specific Registers */
>>> #define DWCMSHC_EMMC_DLL_CTRL 0x800
>>> #define DWCMSHC_EMMC_DLL_RXCLK 0x804
>>> @@ -75,6 +79,10 @@
>>> #define BOUNDARY_OK(addr, len) \
>>> ((addr | (SZ_128M - 1)) == ((addr + len - 1) | (SZ_128M - 1)))
>>> +#define DWCMSHC_SDHCI_CQE_TRNS_MODE (SDHCI_TRNS_MULTI | \
>>> + SDHCI_TRNS_BLK_CNT_EN | \
>>> + SDHCI_TRNS_DMA)
>>> +
>>> enum dwcmshc_rk_type {
>>> DWCMSHC_RK3568,
>>> DWCMSHC_RK3588,
>>> @@ -90,7 +98,8 @@ struct rk35xx_priv {
>>> struct dwcmshc_priv {
>>> struct clk *bus_clk;
>>> - int vendor_specific_area1; /* P_VENDOR_SPECIFIC_AREA reg */
>>> + int vendor_specific_area1; /* P_VENDOR_SPECIFIC_AREA1 reg */
>>> + int vendor_specific_area2; /* P_VENDOR_SPECIFIC_AREA2 reg */
>>> void *priv; /* pointer to SoC private stuff */
>>> };
>>> @@ -210,6 +219,147 @@ static void dwcmshc_hs400_enhanced_strobe(struct mmc_host *mmc,
>>> sdhci_writel(host, vendor, reg);
>>> }
>>> +static u32 dwcmshc_cqe_irq_handler(struct sdhci_host *host, u32 intmask)
>>> +{
>>> + int cmd_error = 0;
>>> + int data_error = 0;
>>> +
>>> + if (!sdhci_cqe_irq(host, intmask, &cmd_error, &data_error))
>>> + return intmask;
>>> +
>>> + cqhci_irq(host->mmc, intmask, cmd_error, data_error);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static void dwcmshc_sdhci_cqe_enable(struct mmc_host *mmc)
>>> +{
>>> + struct sdhci_host *host = mmc_priv(mmc);
>>> + u32 pstate;
>>> + u8 ctrl;
>>> + int count = 10;
>>> +
>>> + /*
>>> + * CQE gets stuck if it sees Buffer Read Enable bit set, which can be
>>> + * the case after tuning, so ensure the buffer is drained.
>>> + */
>>> + pstate = sdhci_readl(host, SDHCI_PRESENT_STATE);
>>> + while (pstate & SDHCI_DATA_AVAILABLE) {
>>> + sdhci_readl(host, SDHCI_BUFFER);
>>> + pstate = sdhci_readl(host, SDHCI_PRESENT_STATE);
>>> + if (count-- == 0) {
>>> + dev_warn(mmc_dev(host->mmc),
>>> + "CQE may get stuck because the Buffer Read Enable bit is set\n");
>>> + break;
>>> + }
>>> + mdelay(1);
>>> + }
>> An alternative, which might be easier, is to do a
>> data reset which may also help allow the device to
>> subsequently enter low power states.
>> Refer commit f8870ae6e2d6be75b1accc2db981169fdfbea7ab
>> and commit 7b7d57fd1b773d25d8358c6017592b4928bf76ce
>
> Thanks, I'll fix it in the next version of the patch.
>
>>
>>> +
>>> + sdhci_writew(host, DWCMSHC_SDHCI_CQE_TRNS_MODE, SDHCI_TRANSFER_MODE);
>>> +
>>> + sdhci_cqe_enable(mmc);
>>> +
>>> + /*
>>> + * The "DesignWare Cores Mobile Storage Host Controller
>>> + * DWC_mshc / DWC_mshc_lite Databook" says:
>>> + * when Host Version 4 Enable" is 1 in Host Control 2 register,
>>> + * SDHCI_CTRL_ADMA32 bit means ADMA2 is selected.
>>> + * Selection of 32-bit/64-bit System Addressing:
>>> + * either 32-bit or 64-bit system addressing is selected by
>>> + * 64-bit Addressing bit in Host Control 2 register.
>>> + *
>>> + * On the other hand the "DesignWare Cores Mobile Storage Host
>>> + * Controller DWC_mshc / DWC_mshc_lite User Guide" says, that we have to
>>> + * set DMA_SEL to ADMA2 _only_ mode in the Host Control 2 register.
>>> + */
>>> + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
>>> + ctrl &= ~SDHCI_CTRL_DMA_MASK;
>>> + ctrl |= SDHCI_CTRL_ADMA32;
>>> + sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
>>> +}
>>> +
>>> +static void dwcmshc_sdhci_cqe_disable(struct mmc_host *mmc, bool recovery)
>>> +{
>>> + /*
>>> + * If an ioctl was issued, cqe_disable will be called.
>>> + * For CQE of sdhci-of-dwcmshc, the previous in-flight cmd will be lost quietly.
>>> + * So wait for mmc idle state.
>> This sounds like it should be fixed in the mmc block driver.
>> Can you provide an example of when this happens?
> Unfortunately I can't provide an example.
> But this part of the patch is updated by me on the advice of Shawn Lin after his reviewing V1 of the patch.
> Please find his message here:
> https://patchwork.kernel.org/project/linux-mmc/patch/20230825143525.869906-2-serghox@xxxxxxxxx/
>
> Just in case here I also quote the part of his comment that refers to this part of the patch:
> "And another issue was found when Rockchip added CQE support for
> sdhci-of-dwcmshc internally, is that if a ioctl was issued, cqe_disable
> will be called. For CQE of sdhci-of-dwcmshc, the previous in-flight cmd
> will be lost quietly. So a mmc->cqe_ops->cqe_wait_for_idle(mmc) should
> be added before sdhci_cqe_disable(), so you need a dwcmshc specified
> cqe_disable hook in sdhci-of-dwcmshc."
mmc block driver already does a wait for idle before ioctl commands,
refer mmc_blk_mq_issue_rq() case MMC_ISSUE_SYNC.
Without more informaton we cannot assume the upstream kernel has
a problem with this.
>
>>
>>> + */
>>> + mmc->cqe_ops->cqe_wait_for_idle(mmc);
>>> +
>>> + return sdhci_cqe_disable(mmc, recovery);
>>> +}
>>> +
>>> +static void dwcmshc_cqhci_set_tran_desc(u8 *desc, dma_addr_t addr, int len, bool end,
>>> + bool dma64)
>>> +{
>>> + __le32 *attr = (__le32 __force *)desc;
>>> +
>>> + *attr = (CQHCI_VALID(1) |
>>> + CQHCI_END(end ? 1 : 0) |
>>> + CQHCI_INT(0) |
>>> + CQHCI_ACT(0x4) |
>>> + CQHCI_DAT_LENGTH(len));
>>> +
>>> + if (dma64) {
>>> + __le64 *dataddr = (__le64 __force *)(desc + 4);
>>> +
>>> + dataddr[0] = cpu_to_le64(addr);
>>> + } else {
>>> + __le32 *dataddr = (__le32 __force *)(desc + 4);
>>> +
>>> + dataddr[0] = cpu_to_le32(addr);
>>> + }
>>> +}
>> This is the same as cqhci_set_tran_desc(). Might as well export that
>> instead.
> Thanks, I'll fix it in the next version of the patch.
>>> +
>>> +static void dwcmshc_cqhci_prep_tran_desc(struct mmc_data *data,
>>> + struct cqhci_host *cq_host,
>>> + u8 *desc, int sg_count)
>>> +{
>>> + int i, len, tmplen, offset;
>>> + bool end = false;
>>> + bool dma64 = cq_host->dma64;
>>> + dma_addr_t addr;
>>> + struct scatterlist *sg;
>>> +
>>> + for_each_sg(data->sg, sg, sg_count, i) {
>>> + addr = sg_dma_address(sg);
>>> + len = sg_dma_len(sg);
>>> +
>>> + /*
>>> + * According to the "DesignWare Cores Mobile Storage Host Controller
>>> + * DWC_mshc / DWC_mshc_lite Databook" the host memory data buffer size
>>> + * and start address must not exceed 128 Mb. If it exceeds,
>>> + * the data buffer must be split using two descritors.
>>> + */
>>> +
>>> + if (likely(BOUNDARY_OK(addr, len))) {
>>> + if ((i + 1) == sg_count)
>>> + end = true;
>>> + dwcmshc_cqhci_set_tran_desc(desc, addr, len, end, dma64);
>>> + desc += cq_host->trans_desc_len;
>>> + } else {
>>> + offset = addr & (SZ_128M - 1);
>>> + tmplen = SZ_128M - offset;
>>> + dwcmshc_cqhci_set_tran_desc(desc, addr, tmplen, end, dma64);
>>> + desc += cq_host->trans_desc_len;
>>> +
>>> + if ((i + 1) == sg_count)
>>> + end = true;
>>> +
>>> + addr += tmplen;
>>> + len -= tmplen;
>>> + dwcmshc_cqhci_set_tran_desc(desc, addr, len, end, dma64);
>>> + desc += cq_host->trans_desc_len;
>>> + }
>>> + }
>>> +}
>> Could this be done more like dwcmshc_adma_write_desc()
> Actually I'm not sure what do you mean. I reused checking boundary construction with
> "BOUNDARY_OK" macro and used the same variable names. I would appreciate it if you could clarify
> what does mean "more like dwcmshc_adma_write_desc()"
Provide a hook for cqhci_set_tran_desc() instead of cqhci_prep_tran_desc()
You'll need to check the details, but something like:
diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
index b3d7d6d8d654..98e7e9d3030d 100644
--- a/drivers/mmc/host/cqhci-core.c
+++ b/drivers/mmc/host/cqhci-core.c
@@ -522,7 +522,10 @@ static int cqhci_prep_tran_desc(struct mmc_request *mrq,
if ((i+1) == sg_count)
end = true;
- cqhci_set_tran_desc(desc, addr, len, end, dma64);
+ if (cq_host->ops->set_tran_desc)
+ cq_host->ops->set_tran_desc(&desc, addr, len, end, dma64);
+ else
+ cqhci_set_tran_desc(desc, addr, len, end, dma64);
desc += cq_host->trans_desc_len;
}
And:
static void dwcmshc_set_tran_desc(u8 **desc, dma_addr_t addr, int len, bool end, bool dma64)
{
int tmplen, offset;
if (likely(!len || BOUNDARY_OK(addr, len))) {
cqhci_set_tran_desc(*desc, addr, len, end, dma64);
return;
}
offset = addr & (SZ_128M - 1);
tmplen = SZ_128M - offset;
cqhci_set_tran_desc(*desc, addr, tmplen, false, dma64);
addr += tmplen;
len -= tmplen;
*desc += cq_host->trans_desc_len;
cqhci_set_tran_desc(*desc, addr, len, end, dma64);
}
>>
>>> +
>>> +static void dwcmshc_cqhci_dumpregs(struct mmc_host *mmc)
>>> +{
>>> + sdhci_dumpregs(mmc_priv(mmc));
>>> +}
>>> +
>>> static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock)
>>> {
>>> struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>>> @@ -345,6 +495,7 @@ static const struct sdhci_ops sdhci_dwcmshc_ops = {
>>> .get_max_clock = dwcmshc_get_max_clock,
>>> .reset = sdhci_reset,
>>> .adma_write_desc = dwcmshc_adma_write_desc,
>>> + .irq = dwcmshc_cqe_irq_handler,
>>> };
>>> static const struct sdhci_ops sdhci_dwcmshc_rk35xx_ops = {
>>> @@ -379,6 +530,70 @@ static const struct sdhci_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
>>> SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
>>> };
>>> +static const struct cqhci_host_ops dwcmshc_cqhci_ops = {
>>> + .enable = dwcmshc_sdhci_cqe_enable,
>>> + .disable = dwcmshc_sdhci_cqe_disable,
>>> + .dumpregs = dwcmshc_cqhci_dumpregs,
>>> + .prep_tran_desc = dwcmshc_cqhci_prep_tran_desc,
>>> +};
>>> +
>>> +static void dwcmshc_cqhci_init(struct sdhci_host *host, struct platform_device *pdev)
>>> +{
>>> + struct cqhci_host *cq_host;
>>> + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>>> + struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
>>> + bool dma64 = false;
>>> + u16 clk;
>>> + int err;
>>> +
>>> + host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD;
>>> + cq_host = devm_kzalloc(&pdev->dev, sizeof(*cq_host), GFP_KERNEL);
>>> + if (!cq_host) {
>>> + dev_err(mmc_dev(host->mmc), "Unable to setup CQE: not enough memory\n");
>>> + return;
>>> + }
>>> +
>>> + /*
>>> + * For dwcmshc host controller we have to enable internal clock
>>> + * before access to some registers from Vendor Specific Aria 2.
>>> + */
>>> + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
>>> + clk |= SDHCI_CLOCK_INT_EN;
>>> + sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
>>> + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
>>> + if (!(clk & SDHCI_CLOCK_INT_EN)) {
>>> + dev_err(mmc_dev(host->mmc), "Unable to setup CQE: internal clock enable error\n");
>>> + goto free_cq_host;
>>> + }
>>> +
>>> + cq_host->mmio = host->ioaddr + priv->vendor_specific_area2;
>>> + cq_host->ops = &dwcmshc_cqhci_ops;
>>> +
>>> + /* Enable using of 128-bit task descriptors */
>>> + dma64 = host->flags & SDHCI_USE_64_BIT_DMA;
>>> + if (dma64) {
>>> + dev_dbg(mmc_dev(host->mmc), "128-bit task descriptors\n");
>>> + cq_host->caps |= CQHCI_TASK_DESC_SZ_128;
>>> + }
>>> + err = cqhci_init(cq_host, host->mmc, dma64);
>>> + if (err) {
>>> + dev_err(mmc_dev(host->mmc), "Unable to setup CQE: error %d\n", err);
>>> + goto int_clok_disable;
>>> + }
>>> +
>>> + dev_dbg(mmc_dev(host->mmc), "CQE init done\n");
>>> +
>>> + return;
>>> +
>>> +int_clok_disable:
>> 'clok' is an odd abbreviation of 'clock'. Perhaps 'clk' or just 'clock'
> Thanks, I'll fix it in the next version of the patch.
>>
>>> + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
>>> + clk &= ~SDHCI_CLOCK_INT_EN;
>>> + sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
>>> +
>>> +free_cq_host:
>>> + devm_kfree(&pdev->dev, cq_host);
>>> +}
>>> +
>>> static int dwcmshc_rk35xx_init(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
>>> {
>>> int err;
>>> @@ -471,7 +686,7 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>> struct rk35xx_priv *rk_priv = NULL;
>>> const struct sdhci_pltfm_data *pltfm_data;
>>> int err;
>>> - u32 extra;
>>> + u32 extra, caps;
>>> pltfm_data = device_get_match_data(&pdev->dev);
>>> if (!pltfm_data) {
>>> @@ -519,6 +734,8 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>> priv->vendor_specific_area1 =
>>> sdhci_readl(host, DWCMSHC_P_VENDOR_AREA1) & DWCMSHC_AREA1_MASK;
>>> + priv->vendor_specific_area2 =
>>> + sdhci_readw(host, DWCMSHC_P_VENDOR_AREA2);
>>> host->mmc_host_ops.request = dwcmshc_request;
>>> host->mmc_host_ops.hs400_enhanced_strobe = dwcmshc_hs400_enhanced_strobe;
>>> @@ -547,6 +764,10 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>> sdhci_enable_v4_mode(host);
>>> #endif
>>> + caps = sdhci_readl(host, SDHCI_CAPABILITIES);
>>> + if (caps & SDHCI_CAN_64BIT_V4)
>>> + sdhci_enable_v4_mode(host);
>>> +
>>> host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
>>> pm_runtime_get_noresume(dev);
>>> @@ -557,6 +778,14 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>> if (err)
>>> goto err_rpm;
>>> + /* Setup Command Queue Engine if enabled */
>>> + if (device_property_read_bool(&pdev->dev, "supports-cqe")) {
>>> + if (caps & SDHCI_CAN_64BIT_V4)
>>> + dwcmshc_cqhci_init(host, pdev);
>>> + else
>>> + dev_warn(dev, "Cannot enable CQE without V4 mode support\n");
>>> + }
>>> +
>>> if (rk_priv)
>>> dwcmshc_rk35xx_postinit(host, priv);
>>>